Skip to content

Commit fae9ba2

Browse files
committed
feat(redirects): short cuts
1 parent a353b88 commit fae9ba2

File tree

4 files changed

+185
-0
lines changed

4 files changed

+185
-0
lines changed

Diff for: Cargo.lock

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: crates/rari-tools/Cargo.toml

+5
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ serde = { version = "1", features = ["derive"] }
1414
serde_json = { version = "1", features = ["preserve_order"] }
1515
csv = "1"
1616
chrono = { version = "0.4", features = ["serde"] }
17+
tracing = "0.1"
18+
concat-in-place = "1"
1719

1820
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
1921
reqwest = { version = "0.12", default-features = false, features = [
@@ -28,3 +30,6 @@ reqwest = { version = "0.12", default-features = false, features = [
2830
"json",
2931
"gzip",
3032
] }
33+
34+
[dev-dependencies]
35+
rari-types = { path = "../rari-types", features = ["testing"] }

Diff for: crates/rari-tools/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
pub mod history;
22
pub mod popularities;
3+
pub mod redirects;

Diff for: crates/rari-tools/src/redirects.rs

+177
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
use std::borrow::Cow;
2+
use std::cmp::Ordering;
3+
use std::collections::HashMap;
4+
5+
use concat_in_place::strcat;
6+
use rari_types::globals::deny_warnings;
7+
use thiserror::Error;
8+
9+
#[derive(Debug, Clone, Error)]
10+
pub enum RedirectError {
11+
#[error("RedirectError: {0}")]
12+
Cycle(String),
13+
#[error("No cased version {0}")]
14+
NoCased(String),
15+
}
16+
17+
// Transitive directed acyclic graph of all redirects.
18+
// All redirects are expanded A -> B, B -> C becomes:
19+
// A -> B, B -> C, A -> C and all cycles are removed.
20+
fn transit<'a>(
21+
s: &'a str,
22+
froms: &mut Vec<&'a str>,
23+
dag: &'a HashMap<&'a str, &'a str>,
24+
) -> Result<Option<String>, RedirectError> {
25+
let next = dag.get(s);
26+
if let Some(next) = next {
27+
froms.push(s);
28+
if froms.iter().any(|from| from == next) {
29+
let msg = format!("redirect cycle [{}] → {next}", froms.join(", "));
30+
if deny_warnings() {
31+
return Err(RedirectError::Cycle(msg));
32+
} else {
33+
tracing::warn!("{msg}")
34+
}
35+
return Ok(None);
36+
}
37+
return transit(next, froms, dag);
38+
}
39+
Ok(Some(s.to_string()))
40+
}
41+
42+
pub fn short_cuts<'a>(
43+
pairs: &'a [(&'a str, &'a str)],
44+
) -> Result<Vec<(String, String)>, RedirectError> {
45+
let mut casing = pairs
46+
.iter()
47+
.flat_map(|(from, to)| {
48+
[
49+
(from.to_lowercase(), Cow::Borrowed(*from)),
50+
(to.to_lowercase(), Cow::Borrowed(*to)),
51+
]
52+
})
53+
.collect::<HashMap<String, Cow<'_, str>>>();
54+
55+
let lowercase_pairs: Vec<_> = pairs
56+
.iter()
57+
.map(|(from, to)| (from.to_lowercase(), to.to_lowercase()))
58+
.collect();
59+
60+
let dag = lowercase_pairs
61+
.iter()
62+
.map(|(from, to)| (from.as_str(), to.as_str()))
63+
.collect();
64+
65+
let mut transitive_dag = HashMap::new();
66+
67+
for (from, _) in lowercase_pairs.iter() {
68+
let mut froms = vec![];
69+
let to = transit(from, &mut froms, &dag)?;
70+
if let Some(to) = to {
71+
for from in froms {
72+
transitive_dag.insert(from.to_string(), to.clone());
73+
}
74+
}
75+
}
76+
77+
// We want to shortcut
78+
// /en-US/docs/foo/bar /en-US/docs/foo#bar
79+
// /en-US/docs/foo /en-US/docs/Web/something
80+
// to
81+
// /en-US/docs/foo/bar /en-US/docs/something#bar
82+
// /en-US/docs/foo /en-US/docs/Web/something
83+
for (from, to) in pairs {
84+
if let Some((bare_to, hash)) = to.split_once('#') {
85+
let bare_to_lc = bare_to.to_lowercase();
86+
if let Some(redirected_to) = transitive_dag.get(&bare_to_lc) {
87+
let new_to = strcat!(redirected_to "#" hash.to_lowercase().as_str());
88+
let redirected_to_cased = casing
89+
.get(redirected_to.as_str())
90+
.ok_or(RedirectError::NoCased(redirected_to.clone()))?;
91+
let new_to_cased = Cow::Owned(strcat!(redirected_to_cased "#" hash));
92+
casing.insert(new_to.to_string(), new_to_cased);
93+
tracing::info!("Short cutting hashed redirect: {from} -> {new_to}");
94+
transitive_dag.insert(from.to_lowercase(), new_to);
95+
}
96+
}
97+
}
98+
99+
// Collect and restore cases!
100+
let mut transitive_pairs: Vec<(String, String)> = transitive_dag
101+
.into_iter()
102+
.map(|(from, to)| {
103+
(
104+
casing
105+
.get(from.as_str())
106+
.map(|s| s.to_string())
107+
.unwrap_or(from),
108+
casing.get(to.as_str()).map(|s| s.to_string()).unwrap_or(to),
109+
)
110+
})
111+
.collect();
112+
transitive_pairs.sort_by(|(a_from, a_to), (b_from, b_to)| match a_from.cmp(b_from) {
113+
Ordering::Equal => a_to.cmp(b_to),
114+
x => x,
115+
});
116+
Ok(transitive_pairs)
117+
}
118+
119+
#[cfg(test)]
120+
mod test {
121+
use super::*;
122+
123+
#[cfg(test)]
124+
mod tests {
125+
use super::*;
126+
127+
#[test]
128+
fn simple_chain() {
129+
let pairs = vec![
130+
("/en-US/docs/A", "/en-US/docs/B"),
131+
("/en-US/docs/B", "/en-US/docs/C"),
132+
];
133+
let result = short_cuts(&pairs).unwrap();
134+
let expected = vec![
135+
("/en-US/docs/A".to_string(), "/en-US/docs/C".to_string()),
136+
("/en-US/docs/B".to_string(), "/en-US/docs/C".to_string()),
137+
];
138+
assert_eq!(result, expected)
139+
}
140+
141+
#[test]
142+
fn a_equals_a() {
143+
let pairs = vec![
144+
("/en-US/docs/A", "/en-US/docs/A"),
145+
("/en-US/docs/B", "/en-US/docs/B"),
146+
];
147+
let result = short_cuts(&pairs).unwrap();
148+
let expected: Vec<(String, String)> = vec![]; // empty result as expected
149+
assert_eq!(result, expected);
150+
}
151+
152+
#[test]
153+
fn simple_cycle() {
154+
let pairs = vec![
155+
("/en-US/docs/A", "/en-US/docs/B"),
156+
("/en-US/docs/B", "/en-US/docs/A"),
157+
];
158+
let result = short_cuts(&pairs).unwrap();
159+
let expected: Vec<(String, String)> = vec![]; // empty result due to cycle
160+
assert_eq!(result, expected);
161+
}
162+
163+
#[test]
164+
fn hashes() {
165+
let pairs = vec![
166+
("/en-US/docs/A", "/en-US/docs/B#Foo"),
167+
("/en-US/docs/B", "/en-US/docs/C"),
168+
];
169+
let result = short_cuts(&pairs).unwrap();
170+
let expected = vec![
171+
("/en-US/docs/A".to_string(), "/en-US/docs/C#Foo".to_string()),
172+
("/en-US/docs/B".to_string(), "/en-US/docs/C".to_string()),
173+
];
174+
assert_eq!(result, expected);
175+
}
176+
}
177+
}

0 commit comments

Comments
 (0)