Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 56 additions & 5 deletions lychee-bin/src/cache.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::time::{self, Timestamp, timestamp};
use anyhow::Result;
use dashmap::DashMap;
use lychee_lib::{CacheStatus, Status, Uri};
use lychee_lib::{CacheStatus, Status, StatusCodeExcluder, Uri};
use serde::{Deserialize, Serialize};
use std::path::Path;

Expand Down Expand Up @@ -33,7 +33,11 @@ pub(crate) trait StoreExt {
fn store<T: AsRef<Path>>(&self, path: T) -> Result<()>;

/// Load cache from path. Discard entries older than `max_age_secs`
fn load<T: AsRef<Path>>(path: T, max_age_secs: u64) -> Result<Cache>;
fn load<T: AsRef<Path>>(
path: T,
max_age_secs: u64,
excluder: &StatusCodeExcluder,
) -> Result<Cache>;
}

impl StoreExt for Cache {
Expand All @@ -47,7 +51,11 @@ impl StoreExt for Cache {
Ok(())
}

fn load<T: AsRef<Path>>(path: T, max_age_secs: u64) -> Result<Cache> {
fn load<T: AsRef<Path>>(
path: T,
max_age_secs: u64,
excluder: &StatusCodeExcluder,
) -> Result<Cache> {
let mut rdr = csv::ReaderBuilder::new()
.has_headers(false)
.from_path(path)?;
Expand All @@ -58,10 +66,53 @@ impl StoreExt for Cache {
let (uri, value): (Uri, CacheValue) = result?;
// Discard entries older than `max_age_secs`.
// This allows gradually updating the cache over multiple runs.
if current_ts - value.timestamp < max_age_secs {
map.insert(uri, value);
if current_ts - value.timestamp >= max_age_secs {
continue;
}

// Discard entries for status codes which have been excluded.
// Without this check, an entry might be cached, then its status code is configured as
// excluded, and in subsequent runs the cached value is still reused.
if value.status.is_excluded(excluder) {
continue;
}

map.insert(uri, value);
}
Ok(map)
}
}

#[cfg(test)]
mod tests {
use dashmap::DashMap;
use lychee_lib::{AcceptRange, CacheStatus, StatusCodeExcluder, Uri};

use crate::{
cache::{Cache, CacheValue, StoreExt},
time::timestamp,
};

#[test]
fn test_excluded_status_not_reused_from_cache() {
let uri: Uri = "https://example.com".try_into().unwrap();

let cache: Cache = DashMap::<Uri, CacheValue>::new();
cache.insert(
uri.clone(),
CacheValue {
status: CacheStatus::Ok(429),
timestamp: timestamp(),
},
);

let tmp = tempfile::NamedTempFile::new().unwrap();
cache.store(tmp.path()).unwrap();

let mut excluder = StatusCodeExcluder::new();
excluder.add_range(AcceptRange::new_from(400, 500).unwrap());

let cache = Cache::load(tmp.path(), u64::MAX, &excluder).unwrap();
assert!(cache.get(&uri).is_none());
}
}
6 changes: 5 additions & 1 deletion lychee-bin/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,11 @@ fn load_cache(cfg: &Config) -> Option<Cache> {
}
}

let cache = Cache::load(LYCHEE_CACHE_FILE, cfg.max_cache_age.as_secs());
let cache = Cache::load(
LYCHEE_CACHE_FILE,
cfg.max_cache_age.as_secs(),
&cfg.cache_exclude_status,
);
match cache {
Ok(cache) => Some(cache),
Err(e) => {
Expand Down
23 changes: 22 additions & 1 deletion lychee-lib/src/types/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::fmt::Display;

use serde::{Deserialize, Deserializer, Serialize};

use crate::{ErrorKind, Status};
use crate::{ErrorKind, Status, StatusCodeExcluder};

/// Representation of the status of a cached request. This is kept simple on
/// purpose because the type gets serialized to a cache file and might need to
Expand Down Expand Up @@ -89,6 +89,27 @@ impl From<&Status> for CacheStatus {
}
}

impl From<CacheStatus> for Option<u16> {
fn from(val: CacheStatus) -> Self {
match val {
CacheStatus::Ok(status) => Some(status),
CacheStatus::Error(status) => status,
_ => None,
}
}
}

impl CacheStatus {
/// Returns `true` if the cache status is excluded by the given [`StatusCodeExcluder`].
#[must_use]
pub fn is_excluded(&self, excluder: &StatusCodeExcluder) -> bool {
match Option::<u16>::from(*self) {
Some(status) => excluder.contains(status),
_ => false,
}
}
}

#[cfg(test)]
mod tests {
use serde::Deserialize;
Expand Down