From 17f62aef532fafa3245508a2c50301b68b6e5f85 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Sat, 12 Oct 2024 21:49:50 +0200 Subject: [PATCH] Respect timeout when retrieving archived link (#1526) --- lychee-bin/src/archive/mod.rs | 10 +++++++--- lychee-bin/src/archive/wayback/mod.rs | 16 +++++++++++----- lychee-bin/src/commands/check.rs | 5 ++++- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/lychee-bin/src/archive/mod.rs b/lychee-bin/src/archive/mod.rs index 767b232530..91391e6d20 100644 --- a/lychee-bin/src/archive/mod.rs +++ b/lychee-bin/src/archive/mod.rs @@ -1,6 +1,6 @@ use reqwest::{Error, Url}; use serde::{Deserialize, Serialize}; -use std::fmt::Display; +use std::{fmt::Display, time::Duration}; use strum::{Display, EnumIter, EnumString, VariantNames}; use crate::color::{color, GREEN, PINK}; @@ -32,11 +32,15 @@ pub(crate) enum Archive { } impl Archive { - pub(crate) async fn get_link(&self, original: &Url) -> Result, Error> { + pub(crate) async fn get_link( + &self, + original: &Url, + timeout: Duration, + ) -> Result, Error> { let function = match self { Archive::WaybackMachine => wayback::get_wayback_link, }; - function(original).await + function(original, timeout).await } } diff --git a/lychee-bin/src/archive/wayback/mod.rs b/lychee-bin/src/archive/wayback/mod.rs index 71cf7e0536..3dfec70bd2 100644 --- a/lychee-bin/src/archive/wayback/mod.rs +++ b/lychee-bin/src/archive/wayback/mod.rs @@ -1,17 +1,23 @@ +use std::time::Duration; + use once_cell::sync::Lazy; use serde::de::Error as SerdeError; use serde::{Deserialize, Deserializer}; use http::StatusCode; -use reqwest::{Error, Url}; +use reqwest::{Client, Error, Url}; static WAYBACK_URL: Lazy = Lazy::new(|| Url::parse("https://archive.org/wayback/available").unwrap()); -pub(crate) async fn get_wayback_link(url: &Url) -> Result, Error> { +pub(crate) async fn get_wayback_link(url: &Url, timeout: Duration) -> Result, Error> { let mut archive_url: Url = WAYBACK_URL.clone(); archive_url.set_query(Some(&format!("url={url}"))); - let response = reqwest::get(archive_url) + let response = Client::builder() + .timeout(timeout) + .build()? + .get(archive_url) + .send() .await? .json::() .await?; @@ -74,7 +80,7 @@ mod tests { // This test can be flaky, because the wayback machine does not always // return a suggestion. Retry a few times if needed. for _ in 0..3 { - match get_wayback_link(&target_url).await { + match get_wayback_link(&target_url, Duration::from_secs(20)).await { Ok(Some(suggested_url)) => { // Ensure the host is correct let host = suggested_url @@ -124,7 +130,7 @@ mod tests { .try_into() .unwrap(); - let response = get_wayback_link(url).await?; + let response = get_wayback_link(url, Duration::from_secs(20)).await?; assert_eq!(response, None); Ok(()) } diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 58efa43ba1..bae73900ed 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -17,6 +17,7 @@ use lychee_lib::{ResponseBody, Status}; use crate::archive::{Archive, Suggestion}; use crate::formatters::get_response_formatter; use crate::formatters::response::ResponseFormatter; +use crate::parse::parse_duration_secs; use crate::verbosity::Verbosity; use crate::{cache::Cache, stats::ResponseStats, ExitCode}; @@ -95,6 +96,7 @@ where &mut stats, !params.cfg.no_progress, max_concurrency, + parse_duration_secs(params.cfg.timeout), ) .await; } @@ -112,6 +114,7 @@ async fn suggest_archived_links( stats: &mut ResponseStats, show_progress: bool, max_concurrency: usize, + timeout: Duration, ) { let failed_urls = &get_failed_urls(stats); let bar = if show_progress { @@ -125,7 +128,7 @@ async fn suggest_archived_links( let suggestions = Mutex::new(&mut stats.suggestion_map); futures::stream::iter(failed_urls) - .map(|(input, url)| (input, url, archive.get_link(url))) + .map(|(input, url)| (input, url, archive.get_link(url, timeout))) .for_each_concurrent(max_concurrency, |(input, url, future)| async { if let Ok(Some(suggestion)) = future.await { suggestions