Skip to content

Commit

Permalink
emit one error, or one warning, per link checker run
Browse files Browse the repository at this point in the history
  • Loading branch information
mwcz committed May 10, 2022
1 parent 4725774 commit defbd54
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 73 deletions.
40 changes: 38 additions & 2 deletions components/site/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub mod sitemap;
pub mod tpls;

use std::collections::HashMap;
use std::fmt::format;
use std::fs::remove_dir_all;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex, RwLock};
Expand Down Expand Up @@ -295,10 +296,45 @@ impl Site {
tpls::register_tera_global_fns(self);

// Needs to be done after rendering markdown as we only get the anchors at that point
link_checking::check_internal_links_with_anchors(self)?;
let internal_link_messages = link_checking::check_internal_links_with_anchors(self);

// log any broken internal links and error out if needed
if let Err(messages) = internal_link_messages {
let messages: Vec<String> = messages
.iter()
.enumerate()
.map(|(i, msg)| format!(" {}. {}", i + 1, msg))
.collect();
let msg = format!(
"Found {} broken internal anchor links\n{}",
messages.len(),
messages.join("\n")
);
match self.config.link_checker.internal_level {
config::LinkCheckerLevel::Warn => console::warn(&msg),
config::LinkCheckerLevel::Error => return Err(anyhow!(msg.clone())),
}
}

// check external links, log the results, and error out if needed
if self.config.is_in_check_mode() {
link_checking::check_external_links(self)?;
let external_link_messages = link_checking::check_external_links(self);
if let Err(messages) = external_link_messages {
let messages: Vec<String> = messages
.iter()
.enumerate()
.map(|(i, msg)| format!(" {}. {}", i + 1, msg))
.collect();
let msg = format!(
"Found {} broken external links\n{}",
messages.len(),
messages.join("\n")
);
match self.config.link_checker.external_level {
config::LinkCheckerLevel::Warn => console::warn(&msg),
config::LinkCheckerLevel::Error => return Err(anyhow!(msg.clone())),
}
}
}

Ok(())
Expand Down
132 changes: 62 additions & 70 deletions components/site/src/link_checking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use libs::url::Url;
/// is always performed (while external ones only conditionally in `zola check`). If broken links
/// are encountered, the `internal_level` setting in config.toml will determine whether they are
/// treated as warnings or errors.
pub fn check_internal_links_with_anchors(site: &Site) -> Result<()> {
pub fn check_internal_links_with_anchors(site: &Site) -> Result<(), Vec<String>> {
println!("Checking all internal links with anchors.");
let library = site.library.write().expect("Get lock for check_internal_links_with_anchors");

Expand Down Expand Up @@ -76,7 +76,7 @@ pub fn check_internal_links_with_anchors(site: &Site) -> Result<()> {
});

// Format faulty entries into error messages, and collect them.
let errors = missing_targets
let messages = missing_targets
.map(|(page_path, md_path, anchor)| {
format!(
"The anchor in the link `@/{}#{}` in {} does not exist.",
Expand All @@ -88,7 +88,7 @@ pub fn check_internal_links_with_anchors(site: &Site) -> Result<()> {
.collect::<Vec<_>>();

// Finally emit a summary, and return overall anchors-checking result.
match errors.len() {
match messages.len() {
0 => {
println!("> Successfully checked {} internal link(s) with anchors.", anchors_total);
Ok(())
Expand All @@ -98,18 +98,7 @@ pub fn check_internal_links_with_anchors(site: &Site) -> Result<()> {
"> Checked {} internal link(s) with anchors: {} target(s) missing.",
anchors_total, errors_total,
);

for err in errors.into_iter() {
match site.config.link_checker.internal_level {
LinkCheckerLevel::Error => console::error(&err),
LinkCheckerLevel::Warn => console::warn(&err),
}
}

match site.config.link_checker.internal_level {
LinkCheckerLevel::Error => Err(anyhow!("broken internal anchor links were found")),
LinkCheckerLevel::Warn => Ok(()),
}
Err(messages)
}
}
}
Expand All @@ -128,7 +117,7 @@ fn get_link_domain(link: &str) -> Result<String> {
};
}

pub fn check_external_links(site: &Site) -> Result<()> {
pub fn check_external_links(site: &Site) -> Result<(), Vec<String>> {
let library = site.library.write().expect("Get lock for check_external_links");

struct LinkDef {
Expand All @@ -143,6 +132,7 @@ pub fn check_external_links(site: &Site) -> Result<()> {
}
}

let mut messages: Vec<String> = vec![];
let mut checked_links: Vec<LinkDef> = vec![];
let mut skipped_link_count: u32 = 0;

Expand Down Expand Up @@ -210,66 +200,68 @@ pub fn check_external_links(site: &Site) -> Result<()> {
// (almost) all pages simultaneously, limiting all links for a single
// domain to one thread to avoid rate-limiting
let threads = std::cmp::min(links_by_domain.len(), 8);
let pool = rayon::ThreadPoolBuilder::new().num_threads(threads).build()?;

let errors = pool.install(|| {
links_by_domain
.par_iter()
.map(|(_domain, links)| {
let mut links_to_process = links.len();
links
.iter()
.filter_map(move |link_def| {
links_to_process -= 1;

let res = link_checker::check_url(
&link_def.external_link,
&site.config.link_checker,
);

if links_to_process > 0 {
// Prevent rate-limiting, wait before next crawl unless we're done with this domain
thread::sleep(time::Duration::from_millis(500));
}

if link_checker::is_valid(&res) {
None
} else {
Some((&link_def.file_path, &link_def.external_link, res))
}
let pool = rayon::ThreadPoolBuilder::new().num_threads(threads).build();

match pool {
Ok(pool) => {
let errors = pool.install(|| {
links_by_domain
.par_iter()
.map(|(_domain, links)| {
let mut links_to_process = links.len();
links
.iter()
.filter_map(move |link_def| {
links_to_process -= 1;

let res = link_checker::check_url(
&link_def.external_link,
&site.config.link_checker,
);

if links_to_process > 0 {
// Prevent rate-limiting, wait before next crawl unless we're done with this domain
thread::sleep(time::Duration::from_millis(500));
}

if link_checker::is_valid(&res) {
None
} else {
Some((&link_def.file_path, &link_def.external_link, res))
}
})
.collect::<Vec<_>>()
})
.flatten()
.collect::<Vec<_>>()
})
.flatten()
.collect::<Vec<_>>()
});
});

println!(
"> Checked {} external link(s): {} error(s) found.",
checked_links.len(),
errors.len()
);
println!(
"> Checked {} external link(s): {} error(s) found.",
checked_links.len(),
errors.len()
);

if errors.is_empty() {
return Ok(());
}
if errors.is_empty() {
return Ok(());
}

for (page_path, link, check_res) in errors.iter() {
let msg = format!(
"Dead link in {} to {}: {}",
page_path.to_string_lossy(),
link,
link_checker::message(check_res)
);

match site.config.link_checker.external_level {
LinkCheckerLevel::Error => todo!(),
LinkCheckerLevel::Warn => todo!(),
for (page_path, link, check_res) in errors.iter() {
messages.push(format!(
"Dead link in {} to {}: {}",
page_path.to_string_lossy(),
link,
link_checker::message(check_res)
));
}
}
Err(pool_err) => messages.push(pool_err.to_string()),
}

match site.config.link_checker.external_level {
LinkCheckerLevel::Error => Err(anyhow!("Dead links found")),
LinkCheckerLevel::Warn => Ok(()),
}
// match site.config.link_checker.external_level {
// LinkCheckerLevel::Error => Err(anyhow!("Dead links found")),
// LinkCheckerLevel::Warn => Ok(()),
// }

Err(messages)
}
2 changes: 1 addition & 1 deletion src/messages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ pub fn unravel_errors(message: &str, error: &Error) {
if !message.is_empty() {
console::error(message);
}
console::error(&format!("Error: {}", error));
console::error(&error.to_string());
let mut cause = error.source();
while let Some(e) = cause {
console::error(&format!("Reason: {}", e));
Expand Down

0 comments on commit defbd54

Please sign in to comment.