From d8b6c2b393c85c0cdcc354c6062e30a03887a766 Mon Sep 17 00:00:00 2001 From: Florian Dieminger Date: Mon, 3 Feb 2025 13:07:14 +0100 Subject: [PATCH] feat(issues): issues for ill cased links (#115) * feat(issues): issues for ill cased links Also fix casing in sidebars via fmt-sidebars * improve redirects --- crates/rari-doc/src/html/fix_link.rs | 17 +++- crates/rari-doc/src/html/links.rs | 10 ++ crates/rari-doc/src/issues.rs | 14 +++ crates/rari-doc/src/redirects.rs | 58 +++++++----- crates/rari-tools/src/sidebars.rs | 133 +++++++++++++++++---------- 5 files changed, 161 insertions(+), 71 deletions(-) diff --git a/crates/rari-doc/src/html/fix_link.rs b/crates/rari-doc/src/html/fix_link.rs index cc8bd12e..9241de01 100644 --- a/crates/rari-doc/src/html/fix_link.rs +++ b/crates/rari-doc/src/html/fix_link.rs @@ -162,8 +162,15 @@ pub fn handle_internal_link( url = original_href, ); } else { + let source = if original_href.to_lowercase() + == resolved_href.to_lowercase() + { + "ill-cased-link" + } else { + "redirected-link" + }; tracing::warn!( - source = "redirected-link", + source = source, ic = ic, line = line, col = col, @@ -181,8 +188,14 @@ pub fn handle_internal_link( if remove_href { tracing::warn!(source = "broken-link", ic = ic, url = original_href); } else { + let source = if original_href.to_lowercase() == resolved_href.to_lowercase() + { + "ill-cased-link" + } else { + "redirected-link" + }; tracing::warn!( - source = "redirected-link", + source = source, ic = ic, url = original_href, redirect = resolved_href diff --git a/crates/rari-doc/src/html/links.rs b/crates/rari-doc/src/html/links.rs index 37eca43e..5f08e866 100644 --- a/crates/rari-doc/src/html/links.rs +++ b/crates/rari-doc/src/html/links.rs @@ -6,6 +6,7 @@ use rari_types::locale::Locale; use rari_utils::concat_strs; use crate::error::DocError; +use crate::issues::get_issue_counter; use crate::pages::page::{Page, PageLike}; use crate::resolve::locale_from_url; use crate::templ::api::RariApi; @@ -102,6 +103,15 @@ pub fn render_link_via_page( } let (url, anchor) = url.split_once('#').unwrap_or((&url, "")); if let Ok(page) = RariApi::get_page(url) { + if url != page.url() && url.to_lowercase() == page.url().to_lowercase() { + let ic = get_issue_counter(); + tracing::warn!( + source = "ill-cased-link", + ic = ic, + url = url, + redirect = page.url() + ); + } let url = page.url(); let content = if let Some(content) = content { Cow::Borrowed(content) diff --git a/crates/rari-doc/src/issues.rs b/crates/rari-doc/src/issues.rs index 48e52df9..34a4e6de 100644 --- a/crates/rari-doc/src/issues.rs +++ b/crates/rari-doc/src/issues.rs @@ -204,6 +204,7 @@ pub enum IssueType { TemplBrokenLink, RedirectedLink, BrokenLink, + IllCasedLink, #[default] Unknown, } @@ -217,6 +218,7 @@ impl FromStr for IssueType { "templ-broken-link" => Self::TemplBrokenLink, "redirected-link" => Self::RedirectedLink, "broken-link" => Self::BrokenLink, + "ill-cased-link" => Self::IllCasedLink, _ => Self::Unknown, }) } @@ -309,6 +311,18 @@ impl DIssue { } } match di.name { + IssueType::IllCasedLink => { + di.fixed = false; + di.fixable = Some(true); + di.explanation = Some(format!( + "{} is ill cased", + additional.get("url").map(|s| s.as_str()).unwrap_or("?") + )); + DIssue::BrokenLink { + display_issue: di, + href: additional.remove("url"), + } + } IssueType::RedirectedLink => { di.fixed = false; di.fixable = Some(true); diff --git a/crates/rari-doc/src/redirects.rs b/crates/rari-doc/src/redirects.rs index ab88336b..146bbdc6 100644 --- a/crates/rari-doc/src/redirects.rs +++ b/crates/rari-doc/src/redirects.rs @@ -16,6 +16,7 @@ use rari_utils::error::RariIoError; use tracing::error; use crate::error::DocError; +use crate::pages::page::{Page, PageLike}; static REDIRECTS: LazyLock> = LazyLock::new(|| { let mut map = HashMap::new(); @@ -90,33 +91,48 @@ where Ok(io::BufReader::new(file).lines()) } -/// Resolves a URL redirect based on the configured redirects. +/// Resolves a given URL to a redirect URL if one exists. /// -/// This function attempts to resolve a redirect for the given URL by looking it up in the `REDIRECTS` hashmap. -/// If a redirect is found, it returns the target URL, optionally appending any hash fragment from the original URL. -/// If no redirect is found, it returns `None`. +/// Takes a URL string as input and returns an Option containing either: +/// - A `Cow` with the redirect URL if a redirect exists +/// - None if no redirect is found /// -/// # Arguments -/// -/// * `url` - A string slice that holds the URL to be resolved. -/// -/// # Returns -/// -/// * `Option>` - Returns `Some(Cow::Borrowed(target_url))` if a redirect is found and the target URL -/// does not contain a hash fragment, or `Some(Cow::Owned(format!("{target_url}{hash}")))` if the target URL -/// contains a hash fragment or the original URL has a hash fragment. Returns `None` if no redirect is found. -pub(crate) fn resolve_redirect<'a>(url: impl AsRef) -> Option> { +/// The function handles hash fragments in URLs and preserves them in the redirect. +/// It also normalizes URLs and can resolve both explicit redirects from the redirects file +/// as well as implicit redirects based on page URL normalization. +pub fn resolve_redirect<'a>(url: impl AsRef) -> Option> { let url = url.as_ref(); let hash_index = url.find('#').unwrap_or(url.len()); let (url_no_hash, hash) = (&url[..hash_index], &url[hash_index..]); - match ( - REDIRECTS - .get(&url_no_hash.to_lowercase()) - .map(|s| s.as_str()), - hash, - ) { + let redirect = match REDIRECTS + .get(&url_no_hash.to_lowercase()) + .map(|s| s.as_str()) + { + Some(redirect) if redirect.starts_with("/") => Some( + Page::from_url(redirect) + .ok() + .and_then(|page| { + if url != page.url() { + Some(Cow::Owned(page.url().to_string())) + } else { + None + } + }) + .unwrap_or(Cow::Borrowed(redirect)), + ), + Some(redirect) => Some(Cow::Borrowed(redirect)), + None if url.starts_with("/") => Page::from_url(url).ok().and_then(|page| { + if url != page.url() { + Some(Cow::Owned(page.url().to_string())) + } else { + None + } + }), + None => None, + }; + match (redirect, hash) { (None, _) => None, - (Some(url), hash) if url.contains('#') || hash.is_empty() => Some(Cow::Borrowed(url)), + (Some(url), hash) if url.contains('#') || hash.is_empty() => Some(url), (Some(url), hash) => Some(Cow::Owned(format!("{url}{hash}"))), } } diff --git a/crates/rari-tools/src/sidebars.rs b/crates/rari-tools/src/sidebars.rs index fac5b58b..5e19149c 100644 --- a/crates/rari-tools/src/sidebars.rs +++ b/crates/rari-tools/src/sidebars.rs @@ -8,6 +8,7 @@ use pretty_yaml::config::{FormatOptions, LanguageOptions}; use rari_doc::html::sidebar::{ BasicEntry, CoreEntry, Sidebar, SidebarEntry, SubPageEntry, SubPageGroupedEntry, WebExtApiEntry, }; +use rari_doc::redirects::resolve_redirect; use rari_types::globals::content_root; use rari_types::locale::{default_locale, Locale}; use rari_utils::concat_strs; @@ -21,6 +22,63 @@ static EN_US_DOCS_PREFIX: &str = concatcp!("/", default_locale().as_url_str(), " type Pair<'a> = (Cow<'a, str>, Option>); type Pairs<'a> = &'a [Pair<'a>]; +pub trait LinkFixer { + fn fix_link(&self, link: Option) -> Option; +} + +impl LinkFixer for Pairs<'_> { + fn fix_link(&self, link: Option) -> Option { + match link { + Some(link) => { + let mut has_prefix = false; + let link = if let Some(l) = link.strip_prefix(EN_US_DOCS_PREFIX) { + has_prefix = true; + l.to_string() + } else { + link + }; + for (from, to) in *self { + if link == *from { + if let Some(to) = to { + if has_prefix { + return Some(concat_strs!(EN_US_DOCS_PREFIX, to)); + } else { + return Some(to.to_string()); + } + } else { + return None; + } + } + } + Some(link) + } + None => None, + } + } +} + +pub struct CaseFixer {} +static EN_US_DOCS_SLASH_PREFIX: &str = concatcp!("/", default_locale().as_url_str(), "/docs/"); + +impl LinkFixer for CaseFixer { + fn fix_link(&self, link: Option) -> Option { + if let Some(link) = &link { + if let Some(slug) = link.strip_prefix("/") { + if let Some(redirect) = + resolve_redirect(&concat_strs!(EN_US_DOCS_SLASH_PREFIX, slug)) + { + if let Some(new_slug) = redirect.strip_prefix(EN_US_DOCS_SLASH_PREFIX) { + if new_slug != slug { + return Some(concat_strs!("/", new_slug)); + } + } + } + } + } + link + } +} + pub fn sync_sidebars() -> Result<(), ToolError> { let mut redirects = HashMap::new(); let path = redirects_path(Locale::default())?; @@ -100,9 +158,16 @@ fn consolidate_l10n(sidebar: &mut Sidebar) { } pub fn fmt_sidebars() -> Result<(), ToolError> { - for (path, mut sidebar) in read_sidebars()? { - consolidate_l10n(&mut sidebar); - write_sidebar(&sidebar, &path)?; + let fixer = CaseFixer {}; + for (path, mut parsed_sidebar) in read_sidebars()? { + consolidate_l10n(&mut parsed_sidebar); + let entries = parsed_sidebar + .sidebar + .into_iter() + .map(|entry| process_entry(entry, &fixer)) + .collect(); + parsed_sidebar.sidebar = entries; + write_sidebar(&parsed_sidebar, &path)?; } Ok(()) } @@ -130,6 +195,7 @@ pub(crate) fn update_sidebars(pairs: Pairs<'_>) -> Result<(), ToolError> { }) .collect::>>(); + let fixer: Pairs = pairs.as_slice(); // Walk the sidebars and potentially replace the links. // `process_entry`` is called recursively to process all children for (path, mut parsed_sidebar) in sidebars { @@ -138,7 +204,7 @@ pub(crate) fn update_sidebars(pairs: Pairs<'_>) -> Result<(), ToolError> { let entries = parsed_sidebar .sidebar .into_iter() - .map(|entry| process_entry(entry, pairs)) + .map(|entry| process_entry(entry, &fixer)) .collect(); // If the sidebar contents have changed, write it back to the file. @@ -201,35 +267,6 @@ fn read_sidebars() -> Result, ToolError> { .collect() } -fn replace_pairs(link: Option, pairs: Pairs<'_>) -> Option { - match link { - Some(link) => { - let mut has_prefix = false; - let link = if let Some(l) = link.strip_prefix(EN_US_DOCS_PREFIX) { - has_prefix = true; - l.to_string() - } else { - link - }; - for (from, to) in pairs { - if link == *from { - if let Some(to) = to { - if has_prefix { - return Some(concat_strs!(EN_US_DOCS_PREFIX, to)); - } else { - return Some(to.to_string()); - } - } else { - return None; - } - } - } - Some(link) - } - None => None, - } -} - fn process_basic_entry( BasicEntry { core: @@ -242,9 +279,9 @@ fn process_basic_entry( }, children, }: BasicEntry, - pairs: Pairs<'_>, + fixer: &impl LinkFixer, ) -> Option { - let new_link: Option = replace_pairs(link.clone(), pairs); + let new_link: Option = fixer.fix_link(link.clone()); if link.is_some() && new_link.is_none() { return None; } @@ -258,7 +295,7 @@ fn process_basic_entry( }, children: children .into_iter() - .map(|c| process_entry(c, pairs)) + .map(|c| process_entry(c, fixer)) .collect(), }) } @@ -278,12 +315,12 @@ fn process_sub_page_grouped_entry( include_parent, depth, }: SubPageGroupedEntry, - pairs: Pairs<'_>, + fixer: &impl LinkFixer, ) -> Option { - let new_path: String = replace_pairs(Some(path), pairs)?; + let new_path: String = fixer.fix_link(Some(path))?; Some(SubPageGroupedEntry { core: CoreEntry { - link: replace_pairs(link.clone(), pairs), + link: fixer.fix_link(link.clone()), hash, title, details, @@ -311,12 +348,12 @@ fn process_sub_page_entry( depth, nested, }: SubPageEntry, - pairs: Pairs<'_>, + fixer: &impl LinkFixer, ) -> Option { - let new_path: String = replace_pairs(Some(path), pairs)?; + let new_path: String = fixer.fix_link(Some(path))?; Some(SubPageEntry { core: CoreEntry { - link: replace_pairs(link.clone(), pairs), + link: fixer.fix_link(link.clone()), hash, title, details, @@ -330,31 +367,31 @@ fn process_sub_page_entry( }) } -fn process_entry(entry: SidebarEntry, pairs: Pairs<'_>) -> SidebarEntry { +fn process_entry(entry: SidebarEntry, fixer: &impl LinkFixer) -> SidebarEntry { match entry { SidebarEntry::Section(basic_entry) => { - if let Some(entry) = process_basic_entry(basic_entry, pairs) { + if let Some(entry) = process_basic_entry(basic_entry, fixer) { SidebarEntry::Section(entry) } else { SidebarEntry::None } } SidebarEntry::Default(basic_entry) => { - if let Some(entry) = process_basic_entry(basic_entry, pairs) { + if let Some(entry) = process_basic_entry(basic_entry, fixer) { SidebarEntry::Default(entry) } else { SidebarEntry::None } } SidebarEntry::ListSubPages(sub_page_entry) => { - if let Some(entry) = process_sub_page_entry(sub_page_entry, pairs) { + if let Some(entry) = process_sub_page_entry(sub_page_entry, fixer) { SidebarEntry::ListSubPages(entry) } else { SidebarEntry::None } } SidebarEntry::ListSubPagesGrouped(sub_page_entry) => { - if let Some(entry) = process_sub_page_grouped_entry(sub_page_entry, pairs) { + if let Some(entry) = process_sub_page_grouped_entry(sub_page_entry, fixer) { SidebarEntry::ListSubPagesGrouped(entry) } else { SidebarEntry::None @@ -362,7 +399,7 @@ fn process_entry(entry: SidebarEntry, pairs: Pairs<'_>) -> SidebarEntry { } SidebarEntry::Link(link) => { - let new_link: Option = replace_pairs(Some(link), pairs); + let new_link: Option = fixer.fix_link(Some(link)); if new_link.is_none() { return SidebarEntry::None; }