Skip to content

Commit af66036

Browse files
Add new tidy check to ensure that rustdoc DOM IDs are all declared as expected
1 parent 1c4e49a commit af66036

File tree

3 files changed

+170
-0
lines changed

3 files changed

+170
-0
lines changed

src/tools/tidy/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ pub mod extdeps;
4747
pub mod features;
4848
pub mod pal;
4949
pub mod primitive_docs;
50+
pub mod rustdoc_html_ids;
5051
pub mod style;
5152
pub mod target_specific_tests;
5253
pub mod ui_tests;

src/tools/tidy/src/main.rs

+3
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ fn main() {
6969
check!(errors, &compiler_path);
7070
check!(error_codes_check, &[&src_path, &compiler_path]);
7171

72+
// Checks for rustdoc.
73+
check!(rustdoc_html_ids, &src_path);
74+
7275
// Checks that only make sense for the std libs.
7376
check!(pal, &library_path);
7477
check!(primitive_docs, &library_path);
+166
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
//! Checks that the rustdoc ID map is up-to-date. The goal here is to check a few things:
2+
//!
3+
//! * All IDs created by rustdoc (through JS or files generation) are declared in the ID map.
4+
//! * There are no unused IDs.
5+
6+
use std::collections::HashMap;
7+
use std::ffi::OsStr;
8+
use std::fs::File;
9+
use std::io::{BufRead, BufReader};
10+
use std::path::Path;
11+
12+
use regex::Regex;
13+
14+
const ID_MAP_PATH: &str = "librustdoc/html/markdown.rs";
15+
const IDS_USED_IN_JS: &[&str] = &[
16+
// This one is created in the JS and therefore cannot be found in rust files.
17+
"help",
18+
// This one is used when we need to use a "default" ID.
19+
"deref-methods",
20+
];
21+
22+
fn extract_ids(path: &Path, bad: &mut bool) -> HashMap<String, usize> {
23+
let file = File::open(path).expect("failed to open file to extract rustdoc IDs");
24+
let buf_reader = BufReader::new(file);
25+
let mut iter = buf_reader.lines();
26+
let mut ids = HashMap::new();
27+
28+
while let Some(Ok(line)) = iter.next() {
29+
if line.trim_start().starts_with("html_id_map!(") {
30+
break;
31+
}
32+
}
33+
// We're now in the function body, time to retrieve the IDs!
34+
while let Some(Ok(line)) = iter.next() {
35+
let line = line.trim_start();
36+
if line.starts_with("// ") {
37+
// It's a comment, ignoring this line...
38+
continue;
39+
} else if line.starts_with(")") {
40+
// We reached the end of the IDs declaration list.
41+
break;
42+
}
43+
let id = line.split('"').skip(1).next().unwrap();
44+
if ids.insert(id.to_owned(), 0).is_some() {
45+
eprintln!(
46+
"=> ID `{}` is defined more than once in the ID map in file `{}`",
47+
id, ID_MAP_PATH
48+
);
49+
*bad = true;
50+
}
51+
}
52+
if ids.is_empty() {
53+
eprintln!("=> No IDs were found in rustdoc in file `{}`...", ID_MAP_PATH);
54+
*bad = true;
55+
}
56+
ids
57+
}
58+
59+
fn check_id(
60+
path: &Path,
61+
id: &str,
62+
ids: &mut HashMap<String, usize>,
63+
line_nb: usize,
64+
bad: &mut bool,
65+
) {
66+
if id.contains('{') {
67+
// This is a formatted ID, no need to check it!
68+
return;
69+
}
70+
let id = id.to_owned();
71+
match ids.get_mut(&id) {
72+
Some(nb) => *nb += 1,
73+
None => {
74+
eprintln!(
75+
"=> ID `{}` in file `{}` at line {} is missing from `init_id_map`",
76+
id,
77+
path.display(),
78+
line_nb + 1,
79+
);
80+
*bad = true;
81+
}
82+
}
83+
}
84+
85+
fn check_ids(
86+
path: &Path,
87+
f: &str,
88+
ids: &mut HashMap<String, usize>,
89+
regex: &Regex,
90+
bad: &mut bool,
91+
small_section_header_checked: &mut usize,
92+
) {
93+
let mut is_checking_small_section_header = None;
94+
95+
for (line_nb, line) in f.lines().enumerate() {
96+
let trimmed = line.trim_start();
97+
// We're not interested in comments or doc comments.
98+
if trimmed.starts_with("//") {
99+
continue;
100+
} else if let Some(start_line) = is_checking_small_section_header {
101+
if line_nb == start_line + 2 {
102+
check_id(path, trimmed.split('"').skip(1).next().unwrap(), ids, line_nb, bad);
103+
is_checking_small_section_header = None;
104+
}
105+
} else if trimmed.starts_with("write_small_section_header(") {
106+
// This function is used to create section: the second argument of the function is an
107+
// ID and we need to check it as well, hence this specific check...
108+
if trimmed.contains(',') {
109+
// This is a call made on one line, so we can simply check it!
110+
check_id(path, trimmed.split('"').skip(1).next().unwrap(), ids, line_nb, bad);
111+
} else {
112+
is_checking_small_section_header = Some(line_nb);
113+
}
114+
*small_section_header_checked += 1;
115+
continue;
116+
}
117+
for cap in regex.captures_iter(line) {
118+
check_id(path, &cap[1], ids, line_nb, bad);
119+
}
120+
}
121+
}
122+
123+
pub fn check(path: &Path, bad: &mut bool) {
124+
// matches ` id="blabla"`
125+
let regex = Regex::new(r#"[\s"]id=\\?["']([^\s\\]+)\\?["'][\s\\>"{]"#).unwrap();
126+
127+
println!("Checking rustdoc IDs...");
128+
let mut ids = extract_ids(&path.join(ID_MAP_PATH), bad);
129+
let mut small_section_header_checked = 0;
130+
if *bad {
131+
return;
132+
}
133+
super::walk(
134+
&path.join("librustdoc/html"),
135+
&mut |path| super::filter_dirs(path),
136+
&mut |entry, contents| {
137+
let path = entry.path();
138+
let file_name = entry.file_name();
139+
if path.extension() == Some(OsStr::new("html"))
140+
|| (path.extension() == Some(OsStr::new("rs")) && file_name != "tests.rs")
141+
{
142+
check_ids(path, contents, &mut ids, &regex, bad, &mut small_section_header_checked);
143+
}
144+
},
145+
);
146+
if small_section_header_checked == 0 {
147+
eprintln!(
148+
"No call to the `write_small_section_header` function was found. Was it renamed?",
149+
);
150+
*bad = true;
151+
}
152+
for (id, nb) in ids {
153+
if IDS_USED_IN_JS.iter().any(|i| i == &id) {
154+
if nb != 0 {
155+
eprintln!("=> ID `{}` is not supposed to be used in Rust code but in the JS!", id);
156+
*bad = true;
157+
}
158+
} else if nb == 0 {
159+
eprintln!(
160+
"=> ID `{}` is unused, it should be removed from `init_id_map` in file `{}`",
161+
id, ID_MAP_PATH
162+
);
163+
*bad = true;
164+
}
165+
}
166+
}

0 commit comments

Comments
 (0)