Skip to content

Commit 21a7f18

Browse files
committed
fix(links): fall back to en-us
1 parent cdf0993 commit 21a7f18

File tree

8 files changed

+243
-123
lines changed

8 files changed

+243
-123
lines changed

Diff for: crates/rari-doc/src/cached_readers.rs

+3
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,9 @@ pub fn contributor_spotlight_files() -> Cow<'static, UrlToPageMap> {
673673
/// ### Example
674674
///
675675
/// ```
676+
/// # use rari_doc::cached_readers::wiki_histories;
677+
/// # use rari_types::locale::Locale;
678+
///
676679
/// let wiki_histories = wiki_histories();
677680
/// if let Some(en_us_history) = wiki_histories.get(&Locale::EnUs) {
678681
/// println!("Loaded en-US wiki history: {:?}", en_us_history);

Diff for: crates/rari-doc/src/contributors.rs

+5
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ pub type WikiHistories = HashMap<Locale, WikiHistory>;
3838
/// # Example
3939
///
4040
/// ```rust
41+
/// # use rari_doc::contributors::contributors_txt;
42+
/// # use rari_doc::contributors::WikiHistoryEntry;
43+
///
4144
/// let github_file_url = "https://github.com/user/repo/blob/main/file.txt";
4245
/// let wiki_history = Some(WikiHistoryEntry {
4346
/// contributors: vec!["Alice".to_string(), "Bob".to_string()],
@@ -56,6 +59,8 @@ pub type WikiHistories = HashMap<Locale, WikiHistory>;
5659
/// If no `wiki_history` is provided:
5760
///
5861
/// ```rust
62+
/// # use rari_doc::contributors::contributors_txt;
63+
/// let github_file_url = "https://github.com/user/repo/blob/main/file.txt";
5964
/// let result = contributors_txt(None, github_file_url);
6065
/// println!("{}", result);
6166
/// // Output:

Diff for: crates/rari-doc/src/html/fix_link.rs

+184
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
use std::borrow::Cow;
2+
3+
use lol_html::{html_content::Element, HandlerResult};
4+
use rari_types::{fm_types::PageType, locale::default_locale};
5+
use rari_utils::concat_strs;
6+
7+
use crate::{
8+
helpers::l10n::l10n_json_data,
9+
issues::get_issue_couter,
10+
pages::page::{Page, PageLike},
11+
redirects::resolve_redirect,
12+
resolve::{strip_locale_from_url, url_with_locale},
13+
};
14+
15+
pub fn check_and_fix_link(
16+
el: &mut Element,
17+
page: &impl PageLike,
18+
data_issues: bool,
19+
) -> HandlerResult {
20+
let original_href = el.get_attribute("href").expect("href was required");
21+
22+
if original_href.starts_with('/') || original_href.starts_with("https://developer.mozilla.org")
23+
{
24+
handle_internal_link(&original_href, el, page, data_issues)
25+
} else if original_href.starts_with("http:") || original_href.starts_with("https:") {
26+
handle_extenal_link(el)
27+
} else {
28+
Ok(())
29+
}
30+
}
31+
32+
pub fn handle_extenal_link(el: &mut Element) -> HandlerResult {
33+
let class = el.get_attribute("class").unwrap_or_default();
34+
if !class.split(' ').any(|s| s == "external") {
35+
el.set_attribute(
36+
"class",
37+
&concat_strs!(&class, if class.is_empty() { "" } else { " " }, "external"),
38+
)?;
39+
}
40+
if !el.has_attribute("target") {
41+
el.set_attribute("target", "_blank")?;
42+
}
43+
Ok(())
44+
}
45+
46+
pub fn handle_internal_link(
47+
original_href: &str,
48+
el: &mut Element,
49+
page: &impl PageLike,
50+
data_issues: bool,
51+
) -> HandlerResult {
52+
// Strip prefix for curriculum links.
53+
let original_href = if page.page_type() == PageType::Curriculum {
54+
original_href
55+
.strip_prefix("https://developer.mozilla.org")
56+
.unwrap_or(original_href)
57+
} else {
58+
original_href
59+
};
60+
61+
let href = original_href
62+
.strip_prefix("https://developer.mozilla.org")
63+
.map(|href| if href.is_empty() { "/" } else { href })
64+
.unwrap_or(original_href);
65+
let href_no_hash = &href[..href.find('#').unwrap_or(href.len())];
66+
let (href_locale, _) = strip_locale_from_url(href);
67+
let no_locale = href_locale.is_none();
68+
if no_locale && Page::ignore_link_check(href_no_hash) {
69+
return Ok(());
70+
}
71+
let maybe_prefixed_href = if no_locale {
72+
Cow::Owned(concat_strs!("/", page.locale().as_url_str(), href))
73+
} else {
74+
Cow::Borrowed(href)
75+
};
76+
let mut resolved_href =
77+
resolve_redirect(&maybe_prefixed_href).unwrap_or(Cow::Borrowed(&maybe_prefixed_href));
78+
let mut resolved_href_no_hash =
79+
&resolved_href[..resolved_href.find('#').unwrap_or(resolved_href.len())];
80+
if resolved_href_no_hash == page.url() {
81+
el.set_attribute("aria-current", "page")?;
82+
}
83+
let en_us_fallback = if !Page::exists(resolved_href_no_hash)
84+
&& !Page::ignore_link_check(href)
85+
&& href_locale != Some(default_locale())
86+
{
87+
println!("{resolved_href}");
88+
if let Some(en_us_href) = url_with_locale(&resolved_href, default_locale()) {
89+
resolved_href = resolve_redirect(&en_us_href).unwrap_or(Cow::Owned(en_us_href));
90+
println!("{resolved_href}");
91+
resolved_href_no_hash =
92+
&resolved_href[..resolved_href.find('#').unwrap_or(resolved_href.len())];
93+
}
94+
true
95+
} else {
96+
false
97+
};
98+
99+
let remove_href = if !Page::exists(resolved_href_no_hash) && !Page::ignore_link_check(href) {
100+
tracing::debug!("{resolved_href_no_hash} {href}");
101+
let class = el.get_attribute("class").unwrap_or_default();
102+
el.set_attribute(
103+
"class",
104+
&concat_strs!(
105+
&class,
106+
if class.is_empty() { "" } else { " " },
107+
"page-not-created"
108+
),
109+
)?;
110+
if let Some(href) = el.get_attribute("href") {
111+
el.set_attribute("data-href", &href)?;
112+
}
113+
el.remove_attribute("href");
114+
el.set_attribute("title", l10n_json_data("Common", "summary", page.locale())?)?;
115+
true
116+
} else {
117+
false
118+
};
119+
120+
if !remove_href && en_us_fallback {
121+
let class = el.get_attribute("class").unwrap_or_default();
122+
if !class.split(' ').any(|s| s == "only-in-en-us") {
123+
el.set_attribute(
124+
"class",
125+
&concat_strs!(
126+
&class,
127+
if class.is_empty() { "" } else { " " },
128+
"only-in-en-us"
129+
),
130+
)?;
131+
}
132+
}
133+
134+
let resolved_href = if no_locale {
135+
strip_locale_from_url(&resolved_href).1
136+
} else {
137+
resolved_href.as_ref()
138+
};
139+
if original_href != resolved_href {
140+
if let Some(pos) = el.get_attribute("data-sourcepos") {
141+
if let Some((start, _)) = pos.split_once('-') {
142+
if let Some((line, col)) = start.split_once(':') {
143+
let line = line
144+
.parse::<i64>()
145+
.map(|l| l + i64::try_from(page.fm_offset()).unwrap_or(l - 1))
146+
.ok()
147+
.unwrap_or(-1);
148+
let col = col.parse::<i64>().ok().unwrap_or(0);
149+
let ic = get_issue_couter();
150+
tracing::warn!(
151+
source = "redirected-link",
152+
ic = ic,
153+
line = line,
154+
col = col,
155+
url = original_href,
156+
redirect = resolved_href
157+
);
158+
if data_issues {
159+
el.set_attribute("data-flaw", &ic.to_string())?;
160+
}
161+
}
162+
}
163+
} else {
164+
let ic = get_issue_couter();
165+
tracing::warn!(
166+
source = "redirected-link",
167+
ic = ic,
168+
url = original_href,
169+
redirect = resolved_href
170+
);
171+
if data_issues {
172+
el.set_attribute("data-flaw", &ic.to_string())?;
173+
}
174+
}
175+
176+
if !remove_href {
177+
el.set_attribute("href", resolved_href)?;
178+
}
179+
}
180+
if remove_href {
181+
el.remove_attribute("href");
182+
}
183+
Ok(())
184+
}

Diff for: crates/rari-doc/src/html/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pub mod bubble_up;
2+
mod fix_link;
23
pub mod links;
34
pub mod modifier;
45
pub mod rewriter;

Diff for: crates/rari-doc/src/html/modifier.rs

+3
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ pub fn remove_empty_p(html: &mut Html) -> Result<(), DocError> {
199199
/// # Example
200200
///
201201
/// ```rust
202+
/// # use scraper::Html;
203+
/// # use rari_doc::html::modifier::add_missing_ids;
204+
///
202205
/// let mut html = Html::parse_document("<h2>Some Heading</h2>");
203206
/// add_missing_ids(&mut html);
204207
/// ```

Diff for: crates/rari-doc/src/html/rewriter.rs

+3-122
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,10 @@ use tracing::warn;
1212
use url::Url;
1313

1414
use crate::error::DocError;
15-
use crate::helpers::l10n::l10n_json_data;
15+
use crate::html::fix_link::check_and_fix_link;
1616
use crate::issues::get_issue_couter;
17-
use crate::pages::page::{Page, PageLike};
17+
use crate::pages::page::PageLike;
1818
use crate::pages::types::curriculum::CurriculumPage;
19-
use crate::redirects::resolve_redirect;
20-
use crate::resolve::strip_locale_from_url;
2119

2220
pub fn post_process_inline_sidebar(input: &str) -> Result<String, DocError> {
2321
let element_content_handlers = vec![element!("*[data-rewriter=em]", |el| {
@@ -190,124 +188,7 @@ pub fn post_process_html<T: PageLike>(
190188
Ok(())
191189
}),
192190
element!("a[href]", |el| {
193-
let original_href = el.get_attribute("href").expect("href was required");
194-
// Strip prefix for curriculum links.
195-
let original_href = if page.page_type() == PageType::Curriculum {
196-
original_href
197-
.strip_prefix("https://developer.mozilla.org")
198-
.unwrap_or(&original_href)
199-
} else {
200-
&original_href
201-
};
202-
if original_href.starts_with('/')
203-
|| original_href.starts_with("https://developer.mozilla.org")
204-
{
205-
let href = original_href
206-
.strip_prefix("https://developer.mozilla.org")
207-
.map(|href| if href.is_empty() { "/" } else { href })
208-
.unwrap_or(original_href);
209-
let href_no_hash = &href[..href.find('#').unwrap_or(href.len())];
210-
let no_locale = strip_locale_from_url(href).0.is_none();
211-
if no_locale && Page::ignore_link_check(href_no_hash) {
212-
return Ok(());
213-
}
214-
let maybe_prefixed_href = if no_locale {
215-
Cow::Owned(concat_strs!("/", page.locale().as_url_str(), href))
216-
} else {
217-
Cow::Borrowed(href)
218-
};
219-
let resolved_href = resolve_redirect(&maybe_prefixed_href)
220-
.unwrap_or(Cow::Borrowed(&maybe_prefixed_href));
221-
let resolved_href_no_hash =
222-
&resolved_href[..resolved_href.find('#').unwrap_or(resolved_href.len())];
223-
if resolved_href_no_hash == page.url() {
224-
el.set_attribute("aria-current", "page")?;
225-
}
226-
let remove_href = if !Page::exists(resolved_href_no_hash)
227-
&& !Page::ignore_link_check(href)
228-
{
229-
tracing::debug!("{resolved_href_no_hash} {href}");
230-
let class = el.get_attribute("class").unwrap_or_default();
231-
el.set_attribute(
232-
"class",
233-
&concat_strs!(
234-
&class,
235-
if class.is_empty() { "" } else { " " },
236-
"page-not-created"
237-
),
238-
)?;
239-
if let Some(href) = el.get_attribute("href") {
240-
el.set_attribute("data-href", &href)?;
241-
}
242-
el.remove_attribute("href");
243-
el.set_attribute("title", l10n_json_data("Common", "summary", page.locale())?)?;
244-
true
245-
} else {
246-
false
247-
};
248-
let resolved_href = if no_locale {
249-
strip_locale_from_url(&resolved_href).1
250-
} else {
251-
resolved_href.as_ref()
252-
};
253-
if original_href != resolved_href {
254-
if let Some(pos) = el.get_attribute("data-sourcepos") {
255-
if let Some((start, _)) = pos.split_once('-') {
256-
if let Some((line, col)) = start.split_once(':') {
257-
let line = line
258-
.parse::<i64>()
259-
.map(|l| l + i64::try_from(page.fm_offset()).unwrap_or(l - 1))
260-
.ok()
261-
.unwrap_or(-1);
262-
let col = col.parse::<i64>().ok().unwrap_or(0);
263-
let ic = get_issue_couter();
264-
tracing::warn!(
265-
source = "redirected-link",
266-
ic = ic,
267-
line = line,
268-
col = col,
269-
url = original_href,
270-
redirect = resolved_href
271-
);
272-
if data_issues {
273-
el.set_attribute("data-flaw", &ic.to_string())?;
274-
}
275-
}
276-
}
277-
} else {
278-
let ic = get_issue_couter();
279-
tracing::warn!(
280-
source = "redirected-link",
281-
ic = ic,
282-
url = original_href,
283-
redirect = resolved_href
284-
);
285-
if data_issues {
286-
el.set_attribute("data-flaw", &ic.to_string())?;
287-
}
288-
}
289-
290-
if !remove_href {
291-
el.set_attribute("href", resolved_href)?;
292-
}
293-
}
294-
if remove_href {
295-
el.remove_attribute("href");
296-
}
297-
} else if original_href.starts_with("http:") || original_href.starts_with("https:") {
298-
let class = el.get_attribute("class").unwrap_or_default();
299-
if !class.split(' ').any(|s| s == "external") {
300-
el.set_attribute(
301-
"class",
302-
&concat_strs!(&class, if class.is_empty() { "" } else { " " }, "external"),
303-
)?;
304-
}
305-
if !el.has_attribute("target") {
306-
el.set_attribute("target", "_blank")?;
307-
}
308-
}
309-
310-
Ok(())
191+
check_and_fix_link(el, page, data_issues)
311192
}),
312193
element!("pre:not(.notranslate)", |el| {
313194
let mut class = el.get_attribute("class").unwrap_or_default();

Diff for: crates/rari-doc/src/redirects.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ where
105105
/// * `Option<Cow<'_, str>>` - Returns `Some(Cow::Borrowed(target_url))` if a redirect is found and the target URL
106106
/// does not contain a hash fragment, or `Some(Cow::Owned(format!("{target_url}{hash}")))` if the target URL
107107
/// contains a hash fragment or the original URL has a hash fragment. Returns `None` if no redirect is found.
108-
pub(crate) fn resolve_redirect(url: &str) -> Option<Cow<'_, str>> {
108+
pub(crate) fn resolve_redirect<'a>(url: impl AsRef<str>) -> Option<Cow<'a, str>> {
109+
let url = url.as_ref();
109110
let hash_index = url.find('#').unwrap_or(url.len());
110111
let (url_no_hash, hash) = (&url[..hash_index], &url[hash_index..]);
111112
match (

0 commit comments

Comments
 (0)