Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Greatly improve rustdoc xpath checks #89676

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 15 additions & 96 deletions src/etc/htmldocck.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,72 +110,9 @@
import re
import shlex
from collections import namedtuple
try:
from html.parser import HTMLParser
except ImportError:
from HTMLParser import HTMLParser
try:
from xml.etree import cElementTree as ET
except ImportError:
from xml.etree import ElementTree as ET

try:
from html.entities import name2codepoint
except ImportError:
from htmlentitydefs import name2codepoint

# "void elements" (no closing tag) from the HTML Standard section 12.1.2
VOID_ELEMENTS = {'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen',
'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr'}

# Python 2 -> 3 compatibility
try:
unichr
except NameError:
unichr = chr

from parsel import Selector

channel = os.environ["DOC_RUST_LANG_ORG_CHANNEL"]

class CustomHTMLParser(HTMLParser):
"""simplified HTML parser.

this is possible because we are dealing with very regular HTML from
rustdoc; we only have to deal with i) void elements and ii) empty
attributes."""
def __init__(self, target=None):
HTMLParser.__init__(self)
self.__builder = target or ET.TreeBuilder()

def handle_starttag(self, tag, attrs):
attrs = {k: v or '' for k, v in attrs}
self.__builder.start(tag, attrs)
if tag in VOID_ELEMENTS:
self.__builder.end(tag)

def handle_endtag(self, tag):
self.__builder.end(tag)

def handle_startendtag(self, tag, attrs):
attrs = {k: v or '' for k, v in attrs}
self.__builder.start(tag, attrs)
self.__builder.end(tag)

def handle_data(self, data):
self.__builder.data(data)

def handle_entityref(self, name):
self.__builder.data(unichr(name2codepoint[name]))

def handle_charref(self, name):
code = int(name[1:], 16) if name.startswith(('x', 'X')) else int(name, 10)
self.__builder.data(unichr(code))

def close(self):
HTMLParser.close(self)
return self.__builder.close()


Command = namedtuple('Command', 'negated cmd args lineno context')


Expand Down Expand Up @@ -256,29 +193,11 @@ def get_commands(template):
yield Command(negated=negated, cmd=cmd, args=args, lineno=lineno+1, context=line)


def _flatten(node, acc):
if node.text:
acc.append(node.text)
for e in node:
_flatten(e, acc)
if e.tail:
acc.append(e.tail)


def flatten(node):
acc = []
_flatten(node, acc)
return ''.join(acc)


def normalize_xpath(path):
path = path.replace("{{channel}}", channel)
if path.startswith('//'):
return '.' + path # avoid warnings
elif path.startswith('.//'):
return path
else:
if not path.startswith('//'):
raise InvalidCheck('Non-absolute XPath is not supported due to implementation issues')
return path


class CachedFiles(object):
Expand Down Expand Up @@ -323,7 +242,7 @@ def get_tree(self, path):

with io.open(abspath, encoding='utf-8') as f:
try:
tree = ET.fromstringlist(f.readlines(), CustomHTMLParser())
tree = Selector(text=f.read())
except Exception as e:
raise RuntimeError('Cannot parse an HTML file {!r}: {}'.format(path, e))
self.trees[path] = tree
Expand Down Expand Up @@ -351,7 +270,7 @@ def check_string(data, pat, regexp):
def check_tree_attr(tree, path, attr, pat, regexp):
path = normalize_xpath(path)
ret = False
for e in tree.findall(path):
for e in tree.xpath(path):
if attr in e.attrib:
value = e.attrib[attr]
else:
Expand All @@ -363,19 +282,19 @@ def check_tree_attr(tree, path, attr, pat, regexp):
return ret


def flatten(elem):
return ''.join(elem.css('::text').getall())


def check_tree_text(tree, path, pat, regexp):
path = normalize_xpath(path)
ret = False
try:
for e in tree.findall(path):
try:
value = flatten(e)
except KeyError:
continue
else:
ret = check_string(value, pat, regexp)
if ret:
break
for e in tree.xpath(path):
value = flatten(e)
ret = check_string(value, pat, regexp)
if ret:
break
except Exception:
print('Failed to get path "{}"'.format(path))
raise
Expand All @@ -384,7 +303,7 @@ def check_tree_text(tree, path, pat, regexp):

def get_tree_count(tree, path):
path = normalize_xpath(path)
return len(tree.findall(path))
return len(tree.xpath(path))


def stderr(*args):
Expand Down
7 changes: 4 additions & 3 deletions src/test/rustdoc/fn-type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub struct Foo<'a, T> {
pub hrtb_lifetime: for<'b, 'c> fn(one: &'b i32, two: &'c &'b i32) -> (&'b i32, &'c i32),
}

// @has 'foo/struct.Foo.html' '//span[@id="structfield.generic"]' "generic: fn(val: &T) -> T"
// @has 'foo/struct.Foo.html' '//span[@id="structfield.lifetime"]' "lifetime: fn(val: &'a i32) -> i32"
// @has 'foo/struct.Foo.html' '//span[@id="structfield.hrtb_lifetime"]' "hrtb_lifetime: for<'b, 'c> fn(one: &'b i32, two: &'c &'b i32) -> (&'b i32, &'c i32)"
// @has 'foo/struct.Foo.html'
// @has - '//span[@id="structfield.generic"]' "generic: fn(val: &T) -> T"
// @has - '//span[@id="structfield.lifetime"]' "lifetime: fn(val: &'a i32) -> i32"
// @has - '//span[@id="structfield.hrtb_lifetime"]' "hrtb_lifetime: for<'b, 'c> fn(one: &'b i32, two: &'c &'b i32) -> (&'b i32, &'c i32)"
8 changes: 4 additions & 4 deletions src/test/rustdoc/inline_cross/renamed-via-module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@
extern crate foo;

// @has foo/iter/index.html
// @has - '//a/[@href="struct.DeprecatedStepBy.html"]' "DeprecatedStepBy"
// @has - '//a/[@href="struct.StepBy.html"]' "StepBy"
// @has - '//a[@href="struct.DeprecatedStepBy.html"]' "DeprecatedStepBy"
// @has - '//a[@href="struct.StepBy.html"]' "StepBy"
// @has foo/iter/struct.DeprecatedStepBy.html
// @has - '//h1' "Struct foo::iter::DeprecatedStepBy"
// @has foo/iter/struct.StepBy.html
// @has - '//h1' "Struct foo::iter::StepBy"

// @has bar/iter/index.html
// @has - '//a/[@href="struct.DeprecatedStepBy.html"]' "DeprecatedStepBy"
// @has - '//a/[@href="struct.StepBy.html"]' "StepBy"
// @has - '//a[@href="struct.DeprecatedStepBy.html"]' "DeprecatedStepBy"
// @has - '//a[@href="struct.StepBy.html"]' "StepBy"
// @has bar/iter/struct.DeprecatedStepBy.html
// @has - '//h1' "Struct bar::iter::DeprecatedStepBy"
// @has bar/iter/struct.StepBy.html
Expand Down
6 changes: 3 additions & 3 deletions src/test/rustdoc/intra-doc/private.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
// make sure to update `rustdoc-ui/intra-doc/private.rs` if you update this file

/// docs [DontDocMe] [DontDocMe::f] [DontDocMe::x]
// @has private/struct.DocMe.html '//*a[@href="struct.DontDocMe.html"]' 'DontDocMe'
// @has private/struct.DocMe.html '//*a[@href="struct.DontDocMe.html#method.f"]' 'DontDocMe::f'
// @has private/struct.DocMe.html '//*a[@href="struct.DontDocMe.html#structfield.x"]' 'DontDocMe::x'
// @has private/struct.DocMe.html '//a[@href="struct.DontDocMe.html"]' 'DontDocMe'
// @has private/struct.DocMe.html '//a[@href="struct.DontDocMe.html#method.f"]' 'DontDocMe::f'
// @has private/struct.DocMe.html '//a[@href="struct.DontDocMe.html#structfield.x"]' 'DontDocMe::x'
pub struct DocMe;
struct DontDocMe {
x: usize,
Expand Down
4 changes: 2 additions & 2 deletions src/test/rustdoc/primitive/no_std.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#![deny(warnings)]
#![deny(rustdoc::broken_intra_doc_links)]

// @has no_std/fn.foo.html '//a/[@href="{{channel}}/core/primitive.u8.html"]' 'u8'
// @has no_std/fn.foo.html '//a/[@href="{{channel}}/core/primitive.u8.html"]' 'primitive link'
// @has no_std/fn.foo.html '//a[@href="{{channel}}/core/primitive.u8.html"]' 'u8'
// @has no_std/fn.foo.html '//a[@href="{{channel}}/core/primitive.u8.html"]' 'primitive link'
/// Link to [primitive link][u8]
pub fn foo() -> u8 {}

Expand Down
2 changes: 1 addition & 1 deletion src/test/rustdoc/proc-macro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#![crate_name="some_macros"]

// @has some_macros/index.html
// @has - '//a/[@href="attr.some_proc_attr.html"]' 'some_proc_attr'
// @has - '//a[@href="attr.some_proc_attr.html"]' 'some_proc_attr'

//! include a link to [some_proc_macro] to make sure it works.

Expand Down
6 changes: 3 additions & 3 deletions src/test/rustdoc/raw-ident-eliminate-r-hashtag.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ pub mod internal {
///
/// [name]: mod
/// [other name]: crate::internal::mod
// @has 'raw_ident_eliminate_r_hashtag/internal/struct.B.html' '//*a[@href="struct.mod.html"]' 'name'
// @has 'raw_ident_eliminate_r_hashtag/internal/struct.B.html' '//*a[@href="struct.mod.html"]' 'other name'
// @has 'raw_ident_eliminate_r_hashtag/internal/struct.B.html' '//a[@href="struct.mod.html"]' 'name'
// @has 'raw_ident_eliminate_r_hashtag/internal/struct.B.html' '//a[@href="struct.mod.html"]' 'other name'
pub struct B;
}

/// See [name].
///
/// [name]: internal::mod
// @has 'raw_ident_eliminate_r_hashtag/struct.A.html' '//*a[@href="internal/struct.mod.html"]' 'name'
// @has 'raw_ident_eliminate_r_hashtag/struct.A.html' '//a[@href="internal/struct.mod.html"]' 'name'
pub struct A;