Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -356,8 +356,7 @@ Options:
Use "lychee --accept '200..=204, 429, 500' <inputs>..." to provide a comma-
separated list of accepted status codes. This example will accept 200, 201,
202, 203, 204, 429, and 500 as valid status codes.

[default: 100..=103,200..=299]
Defaults to '100..=103,200..=299' if the user provides no value.

--archive <ARCHIVE>
Specify the use of a specific web archive. Can be used in combination with `--suggest`
Expand Down
15 changes: 8 additions & 7 deletions lychee-bin/src/cache.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::time::{self, Timestamp, timestamp};
use anyhow::Result;
use dashmap::DashMap;
use lychee_lib::{CacheStatus, Status, StatusCodeExcluder, Uri};
use lychee_lib::{CacheStatus, Status, StatusCodeSelector, Uri};
use serde::{Deserialize, Serialize};
use std::path::Path;

Expand Down Expand Up @@ -36,7 +36,7 @@ pub(crate) trait StoreExt {
fn load<T: AsRef<Path>>(
path: T,
max_age_secs: u64,
excluder: &StatusCodeExcluder,
excluder: &StatusCodeSelector,
) -> Result<Cache>;
}

Expand All @@ -54,7 +54,7 @@ impl StoreExt for Cache {
fn load<T: AsRef<Path>>(
path: T,
max_age_secs: u64,
excluder: &StatusCodeExcluder,
excluder: &StatusCodeSelector,
) -> Result<Cache> {
let mut rdr = csv::ReaderBuilder::new()
.has_headers(false)
Expand Down Expand Up @@ -86,7 +86,8 @@ impl StoreExt for Cache {
#[cfg(test)]
mod tests {
use dashmap::DashMap;
use lychee_lib::{AcceptRange, CacheStatus, StatusCodeExcluder, Uri};
use http::StatusCode;
use lychee_lib::{CacheStatus, StatusCodeSelector, StatusRange, Uri};

use crate::{
cache::{Cache, CacheValue, StoreExt},
Expand All @@ -101,16 +102,16 @@ mod tests {
cache.insert(
uri.clone(),
CacheValue {
status: CacheStatus::Ok(429),
status: CacheStatus::Ok(StatusCode::TOO_MANY_REQUESTS),
timestamp: timestamp(),
},
);

let tmp = tempfile::NamedTempFile::new().unwrap();
cache.store(tmp.path()).unwrap();

let mut excluder = StatusCodeExcluder::new();
excluder.add_range(AcceptRange::new_from(400, 500).unwrap());
let mut excluder = StatusCodeSelector::empty();
excluder.add_range(StatusRange::new(400, 500).unwrap());

let cache = Cache::load(tmp.path(), u64::MAX, &excluder).unwrap();
assert!(cache.get(&uri).is_none());
Expand Down
7 changes: 6 additions & 1 deletion lychee-bin/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::options::{Config, HeaderMapExt};
use crate::parse::{parse_duration_secs, parse_remaps};
use anyhow::{Context, Result};
use http::{HeaderMap, StatusCode};
use lychee_lib::StatusCodeSelector;
use lychee_lib::{Client, ClientBuilder, ratelimit::RateLimitConfig};
use regex::RegexSet;
use reqwest_cookie_store::CookieStoreMutex;
Expand All @@ -17,7 +18,11 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
let remaps = parse_remaps(&cfg.remap)?;
let includes = RegexSet::new(&cfg.include)?;
let excludes = RegexSet::new(&cfg.exclude)?;
let accepted: HashSet<StatusCode> = cfg.accept.clone().try_into()?;
let accepted: HashSet<StatusCode> = cfg
.accept
.clone()
.unwrap_or(StatusCodeSelector::default_accepted())
.into();

// Offline mode overrides the scheme
let schemes = if cfg.offline {
Expand Down
26 changes: 16 additions & 10 deletions lychee-bin/src/commands/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ use std::sync::Mutex;
use std::time::Duration;

use futures::StreamExt;
use http::StatusCode;
use lychee_lib::StatusCodeSelector;
use lychee_lib::ratelimit::HostPool;
use reqwest::Url;
use tokio::sync::mpsc;
Expand Down Expand Up @@ -50,9 +52,13 @@ where
let cache_exclude_status = params
.cfg
.cache_exclude_status
.unwrap_or_default()
.into_set();
let accept = params.cfg.accept.into();
.unwrap_or(StatusCodeSelector::empty())
.into();
let accept = params
.cfg
.accept
.unwrap_or(StatusCodeSelector::default_accepted())
.into();

// Start receiving requests
let handle = tokio::spawn(request_channel_task(
Expand Down Expand Up @@ -185,8 +191,8 @@ async fn request_channel_task(
max_concurrency: usize,
client: Client,
cache: Cache,
cache_exclude_status: HashSet<u16>,
accept: HashSet<u16>,
cache_exclude_status: HashSet<StatusCode>,
accept: HashSet<StatusCode>,
) -> (Cache, Client) {
StreamExt::for_each_concurrent(
ReceiverStream::new(recv_req),
Expand Down Expand Up @@ -242,9 +248,9 @@ async fn check_url(client: &Client, request: Request) -> Response {
async fn handle(
client: &Client,
cache: &Cache,
cache_exclude_status: HashSet<u16>,
cache_exclude_status: HashSet<StatusCode>,
request: Result<Request, RequestError>,
accept: HashSet<u16>,
accept: HashSet<StatusCode>,
) -> Result<Response, ErrorKind> {
// Note that the RequestError cases bypass the cache.
let request = match request {
Expand Down Expand Up @@ -296,10 +302,10 @@ async fn handle(
/// - The status is unsupported.
/// - The status is unknown.
/// - The status code is excluded from the cache.
fn ignore_cache(uri: &Uri, status: &Status, cache_exclude_status: &HashSet<u16>) -> bool {
fn ignore_cache(uri: &Uri, status: &Status, cache_exclude_status: &HashSet<StatusCode>) -> bool {
let status_code_excluded = status
.code()
.is_some_and(|code| cache_exclude_status.contains(&code.as_u16()));
.is_some_and(|code| cache_exclude_status.contains(&code));

uri.is_file()
|| status.is_excluded()
Expand Down Expand Up @@ -389,7 +395,7 @@ mod tests {
#[test]
fn test_cache_ignore_excluded_status() {
// Cache is ignored for excluded status codes
let exclude = [StatusCode::OK.as_u16()].iter().copied().collect();
let exclude = HashSet::from([StatusCode::OK]);

assert!(ignore_cache(
&Uri::try_from("https://[::1]").unwrap(),
Expand Down
4 changes: 2 additions & 2 deletions lychee-bin/src/formatters/stats/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ mod tests {
fn test_markdown_response_cached_ok() {
let response = ResponseBody {
uri: Uri::try_from("http://example.com").unwrap(),
status: Status::Cached(CacheStatus::Ok(200)),
status: Status::Cached(CacheStatus::Ok(StatusCode::OK)),
};
let markdown = markdown_response(&response).unwrap();
assert_eq!(markdown, "* [200] <http://example.com/> | OK (cached)");
Expand All @@ -200,7 +200,7 @@ mod tests {
fn test_markdown_response_cached_err() {
let response = ResponseBody {
uri: Uri::try_from("http://example.com").unwrap(),
status: Status::Cached(CacheStatus::Error(Some(400))),
status: Status::Cached(CacheStatus::Error(Some(StatusCode::BAD_REQUEST))),
};
let markdown = markdown_response(&response).unwrap();
assert_eq!(markdown, "* [400] <http://example.com/> | Error (cached)");
Expand Down
6 changes: 4 additions & 2 deletions lychee-bin/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ use openssl_sys as _; // required for vendored-openssl feature
use options::{HeaderMapExt, LYCHEE_CONFIG_FILE};
use ring as _; // required for apple silicon

use lychee_lib::BasicAuthExtractor;
use lychee_lib::Collector;
use lychee_lib::CookieJar;
use lychee_lib::{BasicAuthExtractor, StatusCodeSelector};

mod cache;
mod client;
Expand Down Expand Up @@ -248,7 +248,9 @@ fn load_cache(cfg: &Config) -> Option<Cache> {
let cache = Cache::load(
LYCHEE_CACHE_FILE,
cfg.max_cache_age.as_secs(),
&cfg.cache_exclude_status.clone().unwrap_or_default(),
&cfg.cache_exclude_status
.clone()
.unwrap_or(StatusCodeSelector::empty()),
);
match cache {
Ok(cache) => Some(cache),
Expand Down
41 changes: 6 additions & 35 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use lychee_lib::ratelimit::HostConfigs;
use lychee_lib::{
Base, BasicAuthSelector, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES,
DEFAULT_RETRY_WAIT_TIME_SECS, DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT, FileExtensions,
FileType, Input, StatusCodeExcluder, StatusCodeSelector, archive::Archive,
FileType, Input, StatusCodeSelector, archive::Archive,
};
use reqwest::tls;
use secrecy::SecretString;
Expand Down Expand Up @@ -196,7 +196,6 @@ default_function! {
retry_wait_time: usize = DEFAULT_RETRY_WAIT_TIME_SECS;
method: String = DEFAULT_METHOD.to_string();
verbosity: Verbosity = Verbosity::default();
accept_selector: StatusCodeSelector = StatusCodeSelector::default();
}

// Macro for merging configuration values
Expand Down Expand Up @@ -491,7 +490,7 @@ Use \"lychee --cache-exclude-status '429, 500..502' <inputs>...\" to provide a
comma-separated list of excluded status codes. This example will not cache results
with a status code of 429, 500 and 501."
)]
pub(crate) cache_exclude_status: Option<StatusCodeExcluder>,
pub(crate) cache_exclude_status: Option<StatusCodeSelector>,

/// Don't perform any link checking.
/// Instead, dump all the links extracted from inputs that would be checked
Expand Down Expand Up @@ -726,7 +725,6 @@ Use the `hosts` option to configure headers on a per-host basis."
#[arg(
short,
long,
default_value_t,
long_help = "A List of accepted status codes for valid links

The following accept range syntax is supported: [start]..[[=]end]|code. Some valid
Expand All @@ -740,10 +738,10 @@ examples are:

Use \"lychee --accept '200..=204, 429, 500' <inputs>...\" to provide a comma-
separated list of accepted status codes. This example will accept 200, 201,
202, 203, 204, 429, and 500 as valid status codes."
202, 203, 204, 429, and 500 as valid status codes.
Defaults to '100..=103,200..=299' if the user provides no value."
)]
#[serde(default = "accept_selector")]
pub(crate) accept: StatusCodeSelector,
pub(crate) accept: Option<StatusCodeSelector>,

/// Enable the checking of fragments in links.
#[arg(long)]
Expand Down Expand Up @@ -986,7 +984,7 @@ impl Config {
..hosts,

// Keys with defaults to assign
accept: StatusCodeSelector::default(),
accept: None,
archive: None,
base: None,
base_url: None,
Expand Down Expand Up @@ -1055,33 +1053,6 @@ mod tests {

use super::*;

#[test]
fn test_accept_status_codes() {
let toml = Config {
accept: StatusCodeSelector::from_str("200..=204, 429, 500").unwrap(),
..Default::default()
};

let mut cli = Config::default();
cli.merge(toml);

assert!(cli.accept.contains(429));
assert!(cli.accept.contains(200));
assert!(cli.accept.contains(203));
assert!(cli.accept.contains(204));
assert!(!cli.accept.contains(205));
}

#[test]
fn test_default() {
let cli = Config::default();

assert_eq!(
cli.accept,
StatusCodeSelector::from_str("100..=103,200..=299").expect("no error")
);
}

#[test]
fn test_parse_custom_headers() {
assert_eq!(
Expand Down
10 changes: 5 additions & 5 deletions lychee-lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,10 @@ pub use crate::{
collector::Collector,
filter::{Excludes, Filter, Includes},
types::{
AcceptRange, AcceptRangeError, Base, BasicAuthCredentials, BasicAuthSelector, CacheStatus,
CookieJar, ErrorKind, FileExtensions, FileType, Input, InputContent, InputResolver,
InputSource, LycheeResult, Preprocessor, Redirects, Request, RequestError,
ResolvedInputSource, Response, ResponseBody, Result, Status, StatusCodeExcluder,
StatusCodeSelector, uri::raw::RawUri, uri::valid::Uri,
Base, BasicAuthCredentials, BasicAuthSelector, CacheStatus, CookieJar, ErrorKind,
FileExtensions, FileType, Input, InputContent, InputResolver, InputSource, LycheeResult,
Preprocessor, Redirects, Request, RequestError, ResolvedInputSource, Response,
ResponseBody, Result, Status, StatusCodeSelector, StatusRange, StatusRangeError,
uri::raw::RawUri, uri::valid::Uri,
},
};
Loading