diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs
index 38f8ab0dec..4451a2240d 100644
--- a/lychee-bin/src/commands/check.rs
+++ b/lychee-bin/src/commands/check.rs
@@ -265,14 +265,13 @@ async fn handle(
             // `accepted` status codes might have changed from the previous run
             // and they may have an impact on the interpretation of the status
             // code.
+            client.host_pool().record_persistent_cache_hit(&uri);
             Status::from_cache_status(v.value().status, &accept)
         };
 
-        client.host_pool().record_cache_hit(&uri);
         return Ok(Response::new(uri.clone(), status, request.source.into()));
     }
 
-    client.host_pool().record_cache_miss(&uri);
     let response = check_url(client, request).await;
 
     // - Never cache filesystem access as it is fast already so caching has no benefit.
diff --git a/lychee-bin/src/formatters/host_stats/compact.rs b/lychee-bin/src/formatters/host_stats/compact.rs
index 121230e259..d312e0c60f 100644
--- a/lychee-bin/src/formatters/host_stats/compact.rs
+++ b/lychee-bin/src/formatters/host_stats/compact.rs
@@ -70,12 +70,8 @@ impl Compact {
 }
 
 impl HostStatsFormatter for Compact {
-    fn format(&self, host_stats: HashMap<String, HostStats>) -> Result<Option<String>> {
-        if host_stats.is_empty() {
-            return Ok(None);
-        }
-
+    fn format(&self, host_stats: HashMap<String, HostStats>) -> Result<String> {
         let compact = CompactHostStats { host_stats };
-        Ok(Some(compact.to_string()))
+        Ok(compact.to_string())
     }
 }
diff --git a/lychee-bin/src/formatters/host_stats/detailed.rs b/lychee-bin/src/formatters/host_stats/detailed.rs
index 01bfd42bc8..f62d248138 100644
--- a/lychee-bin/src/formatters/host_stats/detailed.rs
+++ b/lychee-bin/src/formatters/host_stats/detailed.rs
@@ -79,12 +79,8 @@ impl Detailed {
 }
 
 impl HostStatsFormatter for Detailed {
-    fn format(&self, host_stats: HashMap<String, HostStats>) -> Result<Option<String>> {
-        if host_stats.is_empty() {
-            return Ok(None);
-        }
-
+    fn format(&self, host_stats: HashMap<String, HostStats>) -> Result<String> {
         let detailed = DetailedHostStats { host_stats };
-        Ok(Some(detailed.to_string()))
+        Ok(detailed.to_string())
     }
 }
diff --git a/lychee-bin/src/formatters/host_stats/json.rs b/lychee-bin/src/formatters/host_stats/json.rs
index 24f7fe0d2e..fcb66b89df 100644
--- a/lychee-bin/src/formatters/host_stats/json.rs
+++ b/lychee-bin/src/formatters/host_stats/json.rs
@@ -9,17 +9,13 @@ pub(crate) struct Json;
 
 impl Json {
     pub(crate) const fn new() -> Self {
-        Self {}
+        Self
     }
 }
 
 impl HostStatsFormatter for Json {
     /// Format host stats as JSON object
-    fn format(&self, host_stats: HashMap<String, HostStats>) -> Result<Option<String>> {
-        if host_stats.is_empty() {
-            return Ok(None);
-        }
-
+    fn format(&self, host_stats: HashMap<String, HostStats>) -> Result<String> {
         // Convert HostStats to a more JSON-friendly format
         let json_stats: HashMap<String, serde_json::Value> = host_stats
             .into_iter()
@@ -50,8 +46,6 @@ impl HostStatsFormatter for Json {
             "host_statistics": json_stats
         });
 
-        serde_json::to_string_pretty(&output)
-            .map(Some)
-            .context("Cannot format host stats as JSON")
+        serde_json::to_string_pretty(&output).context("Cannot format host stats as JSON")
     }
 }
diff --git a/lychee-bin/src/formatters/host_stats/markdown.rs b/lychee-bin/src/formatters/host_stats/markdown.rs
index 8980066107..3543911a9e 100644
--- a/lychee-bin/src/formatters/host_stats/markdown.rs
+++ b/lychee-bin/src/formatters/host_stats/markdown.rs
@@ -81,12 +81,8 @@ impl Markdown {
 }
 
 impl HostStatsFormatter for Markdown {
-    fn format(&self, host_stats: HashMap<String, HostStats>) -> Result<Option<String>> {
-        if host_stats.is_empty() {
-            return Ok(None);
-        }
-
+    fn format(&self, host_stats: HashMap<String, HostStats>) -> Result<String> {
         let markdown = MarkdownHostStats(host_stats);
-        Ok(Some(markdown.to_string()))
+        Ok(markdown.to_string())
     }
 }
diff --git a/lychee-bin/src/formatters/host_stats/mod.rs b/lychee-bin/src/formatters/host_stats/mod.rs
index 8c312bfdd5..205f5581d4 100644
--- a/lychee-bin/src/formatters/host_stats/mod.rs
+++ b/lychee-bin/src/formatters/host_stats/mod.rs
@@ -15,7 +15,7 @@ use std::collections::HashMap;
 /// Trait for formatting per-host statistics in different output formats
 pub(crate) trait HostStatsFormatter {
     /// Format the host statistics and return them as a string
-    fn format(&self, host_stats: HashMap<String, HostStats>) -> Result<Option<String>>;
+    fn format(&self, host_stats: HashMap<String, HostStats>) -> Result<String>;
 }
 
 /// Sort host statistics by request count (descending order)
diff --git a/lychee-bin/src/host_stats.rs b/lychee-bin/src/host_stats.rs
index 5ba26d9995..a5ddbf118f 100644
--- a/lychee-bin/src/host_stats.rs
+++ b/lychee-bin/src/host_stats.rs
@@ -11,17 +11,16 @@ pub(crate) fn output_per_host_statistics(host_pool: &HostPool, config: &Config)
 
     let host_stats = host_pool.all_host_stats();
     let host_stats_formatter = get_host_stats_formatter(&config.format, &config.mode);
+    let formatted_host_stats = host_stats_formatter.format(host_stats)?;
 
-    if let Some(formatted_host_stats) = host_stats_formatter.format(host_stats)? {
-        if let Some(output) = &config.output {
-            // For file output, append to the existing output
-            let mut file_content = std::fs::read_to_string(output).unwrap_or_default();
-            file_content.push_str(&formatted_host_stats);
-            std::fs::write(output, file_content)
-                .context("Cannot write host stats to output file")?;
-        } else {
-            print!("{formatted_host_stats}");
-        }
+    if let Some(output) = &config.output {
+        // For file output, append to the existing output
+        let mut file_content = std::fs::read_to_string(output).unwrap_or_default();
+        file_content.push_str(&formatted_host_stats);
+        std::fs::write(output, file_content).context("Cannot write host stats to output file")?;
+    } else {
+        print!("{formatted_host_stats}");
     }
+
     Ok(())
 }
diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs
index c0cae571d0..8fa5b95542 100644
--- a/lychee-bin/tests/cli.rs
+++ b/lychee-bin/tests/cli.rs
@@ -1482,6 +1482,51 @@ The config file should contain every possible key for documentation purposes."
         Ok(())
     }
 
+    #[tokio::test]
+    async fn test_process_internal_host_caching() -> Result<()> {
+        // Note that this process-internal per-host caching
+        // has no direct relation to the lychee cache file
+        // where state can be persisted between multiple invocations.
+        let server = wiremock::MockServer::start().await;
+
+        // Return one rate-limited response to make sure that
+        // such a response isn't cached.
+        wiremock::Mock::given(wiremock::matchers::method("GET"))
+            .respond_with(ResponseTemplate::new(429))
+            .up_to_n_times(1)
+            .mount(&server)
+            .await;
+
+        wiremock::Mock::given(wiremock::matchers::method("GET"))
+            .respond_with(ResponseTemplate::new(200))
+            .expect(1)
+            .mount(&server)
+            .await;
+
+        let temp_dir = tempfile::tempdir()?;
+        for i in 0..9 {
+            let test_md1 = temp_dir.path().join(format!("test{i}.md"));
+            fs::write(&test_md1, server.uri())?;
+        }
+
+        cargo_bin_cmd!()
+            .arg(temp_dir.path())
+            .arg("--host-stats")
+            .assert()
+            .success()
+            .stdout(contains("9 Total"))
+            .stdout(contains("9 OK"))
+            .stdout(contains("0 Errors"))
+            // Per-host statistics
+            // 1 rate limited + 9 OK
+            .stdout(contains("10 reqs"))
+            // 1 rate limited, 1 OK, 8 cached
+            .stdout(contains("80.0% cached"));
+
+        server.verify().await;
+        Ok(())
+    }
+
     #[test]
     fn test_verbatim_skipped_by_default() {
         let input = fixtures_path!().join("TEST_CODE_BLOCKS.md");
diff --git a/lychee-lib/src/checker/website.rs b/lychee-lib/src/checker/website.rs
index 6a00915e46..c490fc22b3 100644
--- a/lychee-lib/src/checker/website.rs
+++ b/lychee-lib/src/checker/website.rs
@@ -2,7 +2,7 @@ use crate::{
     BasicAuthCredentials, ErrorKind, FileType, Status, Uri,
     chain::{Chain, ChainResult, ClientRequestChains, Handler, RequestChain},
     quirks::Quirks,
-    ratelimit::HostPool,
+    ratelimit::{CacheableResponse, HostPool},
     retry::RetryExt,
     types::{redirect_history::RedirectHistory, uri::github::GithubUri},
     utils::fragment_checker::{FragmentChecker, FragmentInput},
@@ -10,7 +10,7 @@ use crate::{
 use async_trait::async_trait;
 use http::{Method, StatusCode};
 use octocrab::Octocrab;
-use reqwest::{Request, Response, header::CONTENT_TYPE};
+use reqwest::{Request, header::CONTENT_TYPE};
 use std::{collections::HashSet, path::Path, sync::Arc, time::Duration};
 use url::Url;
 
@@ -127,12 +127,12 @@ impl WebsiteChecker {
                 // when `accept=200,429`, `status_code=429` will be treated as success
                 // but we are not able the check the fragment since it's inapplicable.
                 if self.include_fragments
-                    && response.status().is_success()
+                    && response.status.is_success()
                     && method == Method::GET
                     && request_url.fragment().is_some_and(|x| !x.is_empty())
                 {
                     let Some(content_type) = response
-                        .headers()
+                        .headers
                         .get(CONTENT_TYPE)
                         .and_then(|header| header.to_str().ok())
                     else {
@@ -143,7 +143,7 @@ impl WebsiteChecker {
                         ct if ct.starts_with("text/html") => FileType::Html,
                         ct if ct.starts_with("text/markdown") => FileType::Markdown,
                         ct if ct.starts_with("text/plain") => {
-                            let path = Path::new(response.url().path());
+                            let path = Path::new(response.url.path());
                             match path.extension() {
                                 Some(ext) if ext.eq_ignore_ascii_case("md") => FileType::Markdown,
                                 _ => return status,
@@ -169,22 +169,18 @@ impl WebsiteChecker {
         &self,
         url: Url,
         status: Status,
-        response: Response,
+        response: CacheableResponse,
         file_type: FileType,
     ) -> Status {
-        match response.text().await {
-            Ok(content) => {
-                match self
-                    .fragment_checker
-                    .check(FragmentInput { content, file_type }, &url)
-                    .await
-                {
-                    Ok(true) => status,
-                    Ok(false) => Status::Error(ErrorKind::InvalidFragment(url.into())),
-                    Err(e) => Status::Error(e),
-                }
-            }
-            Err(e) => Status::Error(ErrorKind::ReadResponseBody(e)),
+        let content = response.text;
+        match self
+            .fragment_checker
+            .check(FragmentInput { content, file_type }, &url)
+            .await
+        {
+            Ok(true) => status,
+            Ok(false) => Status::Error(ErrorKind::InvalidFragment(url.into())),
+            Err(e) => Status::Error(e),
         }
     }
 
diff --git a/lychee-lib/src/ratelimit/host/host.rs b/lychee-lib/src/ratelimit/host/host.rs
index 8e1c1977ac..e5cbe8ff96 100644
--- a/lychee-lib/src/ratelimit/host/host.rs
+++ b/lychee-lib/src/ratelimit/host/host.rs
@@ -1,21 +1,25 @@
-use crate::ratelimit::headers;
+use crate::{
+    ratelimit::{CacheableResponse, headers},
+    retry::RetryExt,
+};
 use dashmap::DashMap;
 use governor::{
     Quota, RateLimiter,
     clock::DefaultClock,
     state::{InMemoryState, NotKeyed},
 };
+use http::StatusCode;
 use humantime_serde::re::humantime::format_duration;
 use log::warn;
-use reqwest::{Client as ReqwestClient, Request, Response};
+use reqwest::{Client as ReqwestClient, Request, Response as ReqwestResponse};
 use std::time::{Duration, Instant};
 use std::{num::NonZeroU32, sync::Mutex};
 use tokio::sync::Semaphore;
 
 use super::key::HostKey;
 use super::stats::HostStats;
+use crate::Uri;
 use crate::types::Result;
-use crate::{CacheStatus, Status, Uri};
 use crate::{
     ErrorKind,
     ratelimit::{HostConfig, RateLimitConfig},
@@ -24,22 +28,8 @@ use crate::{
 /// Cap maximum backoff duration to reasonable limits
 const MAXIMUM_BACKOFF: Duration = Duration::from_secs(60);
 
-/// Cache value for per-host caching
-#[derive(Debug, Clone)]
-struct HostCacheValue {
-    status: CacheStatus,
-}
-
-impl From<&Status> for HostCacheValue {
-    fn from(status: &Status) -> Self {
-        Self {
-            status: status.into(),
-        }
-    }
-}
-
 /// Per-host cache for storing request results
-type HostCache = DashMap<Uri, HostCacheValue>;
+type HostCache = DashMap<Uri, CacheableResponse>;
 
 /// Represents a single host with its own rate limiting, concurrency control,
 /// HTTP client configuration, and request cache.
@@ -70,7 +60,8 @@ pub struct Host {
     /// Current backoff duration for adaptive rate limiting
     backoff_duration: Mutex<Duration>,
 
-    /// Per-host cache to prevent duplicate requests
+    /// Per-host cache to prevent duplicate requests during a single link check invocation.
+    /// Note that this cache has no direct relation to the inter-process persistable [`crate::CacheStatus`].
     cache: HostCache,
 }
 
@@ -108,40 +99,28 @@ impl Host {
     /// # Panics
     ///
     /// Panics if the statistics mutex is poisoned
-    pub fn get_cached_status(&self, uri: &Uri) -> Option<CacheStatus> {
-        if let Some(entry) = self.cache.get(uri) {
-            // Cache hit
-            self.stats.lock().unwrap().record_cache_hit();
-            return Some(entry.status);
-        }
+    fn get_cached_status(&self, uri: &Uri) -> Option<CacheableResponse> {
+        self.cache.get(uri).map(|v| v.clone())
+    }
+
+    fn record_cache_hit(&self) {
+        self.stats.lock().unwrap().record_cache_hit();
+    }
 
-        // Cache miss
+    fn record_cache_miss(&self) {
         self.stats.lock().unwrap().record_cache_miss();
-        None
     }
 
     /// Cache a request result
-    pub fn cache_result(&self, uri: &Uri, status: &Status) {
-        let cache_value = HostCacheValue::from(status);
-        self.cache.insert(uri.clone(), cache_value);
+    fn cache_result(&self, uri: &Uri, response: CacheableResponse) {
+        // Do not cache responses that are potentially retried
+        if !response.status.should_retry() {
+            self.cache.insert(uri.clone(), response);
+        }
     }
 
     /// Execute a request with rate limiting, concurrency control, and caching
     ///
-    /// This method:
-    /// 1. Checks the per-host cache for existing results
-    /// 2. If not cached, acquires a semaphore permit for concurrency control
-    /// 3. Waits for rate limiter permission
-    /// 4. Applies adaptive backoff if needed
-    /// 5. Executes the request
-    /// 6. Updates statistics based on response
-    /// 7. Parses rate limit headers to adjust future behavior
-    /// 8. Caches the result for future use
-    ///
-    /// # Arguments
-    ///
-    /// * `request` - The HTTP request to execute
-    ///
     /// # Errors
     ///
     /// Returns an error if the request fails or rate limiting is exceeded
@@ -149,39 +128,35 @@ impl Host {
     /// # Panics
     ///
     /// Panics if the statistics mutex is poisoned
-    pub async fn execute_request(&self, request: Request) -> Result<Response> {
+    pub(crate) async fn execute_request(&self, request: Request) -> Result<CacheableResponse> {
         let uri = Uri::from(request.url().clone());
+        let _permit = self.acquire_semaphore().await;
 
-        // Note: Cache checking is handled at the HostPool level
-        // This method focuses on executing the actual HTTP request
-
-        // Acquire semaphore permit for concurrency control
-        let _permit = self
-            .semaphore
-            .acquire()
-            .await
-            // SAFETY: this should not panic as we never close the semaphore
-            .expect("Semaphore was closed unexpectedly");
-
-        // Apply adaptive backoff if needed
-        let backoff_duration = {
-            let backoff = self.backoff_duration.lock().unwrap();
-            *backoff
-        };
-        if !backoff_duration.is_zero() {
-            log::debug!(
-                "Host {} applying backoff delay of {}ms due to previous rate limiting or errors",
-                self.key,
-                backoff_duration.as_millis()
-            );
-            tokio::time::sleep(backoff_duration).await;
+        if let Some(cached) = self.get_cached_status(&uri) {
+            self.record_cache_hit();
+            return Ok(cached);
         }
 
+        self.await_backoff().await;
+
         if let Some(rate_limiter) = &self.rate_limiter {
             rate_limiter.until_ready().await;
         }
 
-        // Execute the request and track timing
+        if let Some(cached) = self.get_cached_status(&uri) {
+            self.record_cache_hit();
+            return Ok(cached);
+        }
+
+        self.record_cache_miss();
+        self.perform_request(request, uri).await
+    }
+
+    pub(crate) const fn get_client(&self) -> &ReqwestClient {
+        &self.client
+    }
+
+    async fn perform_request(&self, request: Request, uri: Uri) -> Result<CacheableResponse> {
         let start_time = Instant::now();
         let response = match self.client.execute(request).await {
             Ok(response) => response,
@@ -190,74 +165,85 @@ impl Host {
                 return Err(ErrorKind::NetworkRequest(e));
             }
         };
-        let request_time = start_time.elapsed();
 
-        // Update statistics based on response
-        let status_code = response.status().as_u16();
-        self.update_stats_and_backoff(status_code, request_time);
-
-        // Parse rate limit headers to adjust behavior
+        self.update_stats(response.status(), start_time.elapsed());
+        self.update_backoff(response.status());
         self.handle_rate_limit_headers(&response);
 
-        // Cache the result
-        let status = Status::Ok(response.status());
-        self.cache_result(&uri, &status);
-
+        let response = CacheableResponse::try_from(response).await?;
+        self.cache_result(&uri, response.clone());
         Ok(response)
     }
 
-    pub(crate) const fn get_client(&self) -> &ReqwestClient {
-        &self.client
+    /// Await adaptive backoff if needed
+    async fn await_backoff(&self) {
+        let backoff_duration = {
+            let backoff = self.backoff_duration.lock().unwrap();
+            *backoff
+        };
+        if !backoff_duration.is_zero() {
+            log::debug!(
+                "Host {} applying backoff delay of {}ms due to previous rate limiting or errors",
+                self.key,
+                backoff_duration.as_millis()
+            );
+            tokio::time::sleep(backoff_duration).await;
+        }
     }
 
-    /// Update internal statistics and backoff based on the response
-    fn update_stats_and_backoff(&self, status_code: u16, request_time: Duration) {
-        // Update statistics
-        {
-            let mut stats = self.stats.lock().unwrap();
-            stats.record_response(status_code, request_time);
-        }
+    async fn acquire_semaphore(&self) -> tokio::sync::SemaphorePermit<'_> {
+        self.semaphore
+            .acquire()
+            .await
+            // SAFETY: this should not panic as we never close the semaphore
+            .expect("Semaphore was closed unexpectedly")
+    }
 
-        // Update backoff duration based on response
-        {
-            let mut backoff = self.backoff_duration.lock().unwrap();
-            match status_code {
-                200..=299 => {
-                    // Reset backoff on success
-                    *backoff = Duration::from_millis(0);
-                }
-                429 => {
-                    // Exponential backoff on rate limit, capped at 30 seconds
-                    let new_backoff = std::cmp::min(
-                        if backoff.is_zero() {
-                            Duration::from_millis(500)
-                        } else {
-                            *backoff * 2
-                        },
-                        Duration::from_secs(30),
-                    );
-                    log::debug!(
-                        "Host {} hit rate limit (429), increasing backoff from {}ms to {}ms",
-                        self.key,
-                        backoff.as_millis(),
-                        new_backoff.as_millis()
-                    );
-                    *backoff = new_backoff;
-                }
-                500..=599 => {
-                    // Moderate backoff increase on server errors, capped at 10 seconds
-                    *backoff = std::cmp::min(
-                        *backoff + Duration::from_millis(200),
-                        Duration::from_secs(10),
-                    );
-                }
-                _ => {} // No backoff change for other status codes
+    fn update_backoff(&self, status: StatusCode) {
+        let mut backoff = self.backoff_duration.lock().unwrap();
+        match status.as_u16() {
+            200..=299 => {
+                // Reset backoff on success
+                *backoff = Duration::from_millis(0);
             }
+            429 => {
+                // Exponential backoff on rate limit, capped at 30 seconds
+                let new_backoff = std::cmp::min(
+                    if backoff.is_zero() {
+                        Duration::from_millis(500)
+                    } else {
+                        *backoff * 2
+                    },
+                    Duration::from_secs(30),
+                );
+                log::debug!(
+                    "Host {} hit rate limit (429), increasing backoff from {}ms to {}ms",
+                    self.key,
+                    backoff.as_millis(),
+                    new_backoff.as_millis()
+                );
+                *backoff = new_backoff;
+            }
+            500..=599 => {
+                // Moderate backoff increase on server errors, capped at 10 seconds
+                *backoff = std::cmp::min(
+                    *backoff + Duration::from_millis(200),
+                    Duration::from_secs(10),
+                );
+            }
+            _ => {} // No backoff change for other status codes
         }
     }
 
+    fn update_stats(&self, status: StatusCode, request_time: Duration) {
+        self.stats
+            .lock()
+            .unwrap()
+            .record_response(status.as_u16(), request_time);
+    }
+
     /// Parse rate limit headers from response and adjust behavior
-    fn handle_rate_limit_headers(&self, response: &Response) {
+    fn handle_rate_limit_headers(&self, response: &ReqwestResponse) {
         // Implement basic parsing here rather than using the rate-limits crate to keep dependencies minimal
         let headers = response.headers();
         self.handle_retry_after_header(headers);
@@ -321,27 +307,10 @@ impl Host {
         self.stats.lock().unwrap().clone()
     }
 
-    /// Record a cache hit from the persistent disk cache
-    ///
-    /// # Panics
-    ///
-    /// Panics if the statistics mutex is poisoned
-    pub fn record_persistent_cache_hit(&self) {
-        self.stats.lock().unwrap().record_cache_hit();
-    }
-
-    /// Record a cache miss from the persistent disk cache
-    ///
-    /// # Panics
-    ///
-    /// Panics if the statistics mutex is poisoned
-    pub fn record_persistent_cache_miss(&self) {
-        self.stats.lock().unwrap().record_cache_miss();
-    }
-
-    /// Get the current number of available permits (concurrent request slots)
-    pub fn available_permits(&self) -> usize {
-        self.semaphore.available_permits()
+    /// Record a cache hit from the persistent disk cache.
+    /// Cache misses are tracked internally, so we don't expose such a method.
+    pub(crate) fn record_persistent_cache_hit(&self) {
+        self.record_cache_hit();
     }
 
     /// Get the current cache size (number of cached entries)
@@ -365,7 +334,7 @@ mod tests {
         let host = Host::new(key.clone(), &host_config, &global_config, Client::default());
 
         assert_eq!(host.key, key);
-        assert_eq!(host.available_permits(), 10); // Default concurrency
+        assert_eq!(host.semaphore.available_permits(), 10); // Default concurrency
         assert!((host.stats().success_rate() - 1.0).abs() < f64::EPSILON);
         assert_eq!(host.cache_size(), 0);
     }
diff --git a/lychee-lib/src/ratelimit/mod.rs b/lychee-lib/src/ratelimit/mod.rs
index ad4cb48551..fac500207b 100644
--- a/lychee-lib/src/ratelimit/mod.rs
+++ b/lychee-lib/src/ratelimit/mod.rs
@@ -19,4 +19,36 @@ mod pool;
 
 pub use config::{HostConfig, HostConfigs, RateLimitConfig};
 pub use host::{Host, HostKey, HostStats};
+use http::HeaderMap;
 pub use pool::{ClientMap, HostPool};
+use reqwest::Response;
+use url::Url;
+
+use crate::{ErrorKind, Result};
+
+/// The result of a HTTP request, used for internal per-host caching.
+/// This abstraction exists, because [`Response`] cannot easily be cached
+/// since it does not implement [`Clone`].
+#[derive(Debug, Clone)]
+pub(crate) struct CacheableResponse {
+    pub(crate) status: reqwest::StatusCode,
+    pub(crate) text: String,
+    pub(crate) headers: HeaderMap,
+    pub(crate) url: Url,
+}
+
+impl CacheableResponse {
+    async fn try_from(response: Response) -> Result<Self> {
+        let status = response.status();
+        let headers = response.headers().clone();
+        let url = response.url().clone();
+        let text = response.text().await.map_err(ErrorKind::ReadResponseBody)?;
+
+        Ok(Self {
+            status,
+            text,
+            headers,
+            url,
+        })
+    }
+}
diff --git a/lychee-lib/src/ratelimit/pool.rs b/lychee-lib/src/ratelimit/pool.rs
index 9ea7700803..df6d5027dc 100644
--- a/lychee-lib/src/ratelimit/pool.rs
+++ b/lychee-lib/src/ratelimit/pool.rs
@@ -1,12 +1,12 @@
 use dashmap::DashMap;
 use http::Method;
-use reqwest::{Client, Request, Response};
+use reqwest::{Client, Request};
 use std::collections::HashMap;
 use std::sync::Arc;
 
-use crate::ratelimit::{self, Host, HostConfigs, HostKey, HostStats, RateLimitConfig};
+use crate::ratelimit::{CacheableResponse, Host, HostConfigs, HostKey, HostStats, RateLimitConfig};
 use crate::types::Result;
-use crate::{CacheStatus, ErrorKind, Status, Uri};
+use crate::{ErrorKind, Uri};
 
 /// Keep track of host-specific [`reqwest::Client`]s
 pub type ClientMap = HashMap<HostKey, reqwest::Client>;
@@ -65,23 +65,7 @@ impl HostPool {
     /// Fails if:
     /// - The request URL has no valid hostname
     /// - The underlying HTTP request fails
-    ///
-    /// # Examples
-    ///
-    /// ```no_run
-    /// # use lychee_lib::ratelimit::{HostPool, RateLimitConfig};
-    /// # use std::collections::HashMap;
-    /// # use reqwest::{Request, header::HeaderMap};
-    /// # use std::time::Duration;
-    /// # #[tokio::main]
-    /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    /// let pool = HostPool::default();
-    /// let request = reqwest::Request::new(reqwest::Method::GET, "https://example.com".parse()?);
-    /// let response = pool.execute_request(request).await?;
-    /// # Ok(())
-    /// # }
-    /// ```
-    pub async fn execute_request(&self, request: Request) -> Result<Response> {
+    pub(crate) async fn execute_request(&self, request: Request) -> Result<CacheableResponse> {
         let url = request.url();
         let host_key = HostKey::try_from(url)?;
         let host = self.get_or_create_host(host_key);
@@ -187,33 +171,6 @@ impl HostPool {
         self.hosts.remove(&host_key).is_some()
     }
 
-    /// Check if a URI is cached in the appropriate host's cache
-    ///
-    /// # Returns
-    ///
-    /// Returns the cached status if found and valid, `None` otherwise
-    #[must_use]
-    pub fn get_cached_status(&self, uri: &Uri) -> Option<CacheStatus> {
-        let host_key = HostKey::try_from(uri).ok()?;
-
-        if let Some(host) = self.hosts.get(&host_key) {
-            host.get_cached_status(uri)
-        } else {
-            None
-        }
-    }
-
-    /// Cache a result for a URI in the appropriate host's cache
-    pub fn cache_result(&self, uri: &Uri, status: &Status) {
-        if let Ok(host_key) = HostKey::try_from(uri)
-            && let Some(host) = self.hosts.get(&host_key)
-        {
-            host.cache_result(uri, status);
-        }
-        // If host doesn't exist yet, we don't cache
-        // The result will be cached when the host is created and the request is made
-    }
-
     /// Get cache statistics across all hosts
     #[must_use]
     pub fn cache_stats(&self) -> HashMap<String, (usize, f64)> {
@@ -228,34 +185,16 @@ impl HostPool {
             .collect()
     }
 
-    /// Try to record a cache hit for the given URI in host statistics.
-    /// File and mail URIs are ignored.
-    ///
-    /// This tracks that a request was served from the persistent disk cache
-    /// rather than going through the rate-limited HTTP request flow.
-    /// This method will create a host instance if one doesn't exist yet.
-    pub fn record_cache_hit(&self, uri: &crate::Uri) {
-        self.record_cache_event(uri, |host| host.record_persistent_cache_hit());
-    }
-
-    /// Try to record a cache miss for the given URI in host statistics
-    /// File and mail URIs are ignored.
-    ///
-    /// This tracks that a request could not be served from the persistent disk cache
-    /// and will need to go through the rate-limited HTTP request flow.
-    /// This method will create a Host instance if one doesn't exist yet.
-    pub fn record_cache_miss(&self, uri: &Uri) {
-        self.record_cache_event(uri, |host| host.record_persistent_cache_miss());
-    }
-
-    /// File and mail URIs are ignored.
-    /// Errors are logged and not returned.
-    fn record_cache_event<F: Fn(Arc<Host>)>(&self, uri: &Uri, action: F) {
+    /// Record a cache hit for the given URI in host statistics.
+    /// This tracks that a request was served from the persistent disk cache.
+    /// Note that no equivalent function for tracking cache misses is exposed,
+    /// since this is handled internally.
+    pub fn record_persistent_cache_hit(&self, uri: &crate::Uri) {
         if !uri.is_file() && !uri.is_mail() {
-            match ratelimit::HostKey::try_from(uri) {
+            match crate::ratelimit::HostKey::try_from(uri) {
                 Ok(key) => {
                     let host = self.get_or_create_host(key);
-                    action(host);
+                    host.record_persistent_cache_hit();
                 }
                 Err(e) => {
                     log::debug!("Failed to record cache hit for {uri}: {e}");
diff --git a/lychee-lib/src/retry.rs b/lychee-lib/src/retry.rs
index 8bf0ec7d10..595cd08df5 100644
--- a/lychee-lib/src/retry.rs
+++ b/lychee-lib/src/retry.rs
@@ -18,21 +18,10 @@ pub(crate) trait RetryExt {
 
 impl RetryExt for reqwest::StatusCode {
     /// Try to map a `reqwest` response into `Retryable`.
-    #[allow(clippy::if_same_then_else)]
     fn should_retry(&self) -> bool {
-        let status = *self;
-        if status.is_server_error() {
-            true
-        } else if status.is_client_error()
-            && status != StatusCode::REQUEST_TIMEOUT
-            && status != StatusCode::TOO_MANY_REQUESTS
-        {
-            false
-        } else if status.is_success() {
-            false
-        } else {
-            status == StatusCode::REQUEST_TIMEOUT || status == StatusCode::TOO_MANY_REQUESTS
-        }
+        self.is_server_error()
+            || self == &StatusCode::REQUEST_TIMEOUT
+            || self == &StatusCode::TOO_MANY_REQUESTS
     }
 }
 
diff --git a/lychee-lib/src/types/status.rs b/lychee-lib/src/types/status.rs
index f246c0bd34..35577ec4f3 100644
--- a/lychee-lib/src/types/status.rs
+++ b/lychee-lib/src/types/status.rs
@@ -4,8 +4,8 @@ use super::CacheStatus;
 use super::redirect_history::Redirects;
 use crate::ErrorKind;
 use crate::RequestError;
+use crate::ratelimit::CacheableResponse;
 use http::StatusCode;
-use reqwest::Response;
 use serde::ser::SerializeStruct;
 use serde::{Serialize, Serializer};
 
@@ -92,13 +92,12 @@ impl Serialize for Status {
 impl Status {
     #[must_use]
     /// Create a status object from a response and the set of accepted status codes
-    pub fn new(response: &Response, accepted: &HashSet<StatusCode>) -> Self {
-        let code = response.status();
-
-        if accepted.contains(&code) {
-            Self::Ok(code)
+    pub(crate) fn new(response: &CacheableResponse, accepted: &HashSet<StatusCode>) -> Self {
+        let status = response.status;
+        if accepted.contains(&status) {
+            Self::Ok(status)
         } else {
-            Self::Error(ErrorKind::RejectedStatusCode(code))
+            Self::Error(ErrorKind::RejectedStatusCode(status))
         }
     }