Skip to content

Commit

Permalink
feat(parser/html): correctly parse void elements
Browse files Browse the repository at this point in the history
  • Loading branch information
dyc3 committed Sep 11, 2024
1 parent 7ffc53f commit 39f7456
Show file tree
Hide file tree
Showing 17 changed files with 778 additions and 23 deletions.
45 changes: 33 additions & 12 deletions crates/biome_html_factory/src/generated/node_factory.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 32 additions & 2 deletions crates/biome_html_parser/src/syntax/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ use biome_parser::Parser;

const RECOVER_ATTRIBUTE_LIST: TokenSet<HtmlSyntaxKind> = token_set!(T![>], T![<], T![/]);

/// These elements are effectively always self-closing. They should not have a closing tag (if they do, it should be a parsing error). They might not contain a `/` like in `<img />`.
static VOID_ELEMENTS: &'static [&'static str] = &[
"area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "source", "track",
"wbr",
];

pub(crate) fn parse_root(p: &mut HtmlParser) {
let m = p.start();

Expand Down Expand Up @@ -54,6 +60,8 @@ fn parse_element(p: &mut HtmlParser) -> ParsedSyntax {
let m = p.start();

p.bump(T![<]);
let opening_tag_name = p.cur_text().to_string();
let should_be_self_closing = VOID_ELEMENTS.contains(&opening_tag_name.as_str());
parse_literal(p).or_add_diagnostic(p, expected_element_name);

AttributeList.parse_list(p);
Expand All @@ -63,10 +71,28 @@ fn parse_element(p: &mut HtmlParser) -> ParsedSyntax {
p.expect(T![>]);
Present(m.complete(p, HTML_SELF_CLOSING_ELEMENT))
} else {
if should_be_self_closing {
if p.at(T![/]) {
p.bump(T![/]);
}
p.expect(T![>]);
return Present(m.complete(p, HTML_SELF_CLOSING_ELEMENT));
}
p.expect_with_context(T![>], HtmlLexContext::ElementList);
let opening = m.complete(p, HTML_OPENING_ELEMENT);
ElementList.parse_list(p);
parse_closing_element(p).or_add_diagnostic(p, expected_closing_tag);
loop {
ElementList.parse_list(p);
if let Some(mut closing) =
parse_closing_element(p).or_add_diagnostic(p, expected_closing_tag)
{
if !closing.text(p).contains(opening_tag_name.as_str()) {
p.error(expected_matching_closing_tag(p, closing.range(p)).into_diagnostic(p));
closing.change_to_bogus(p);
continue;
}
}
break;
}
let previous = opening.precede(p);

Present(previous.complete(p, HTML_ELEMENT))
Expand All @@ -80,6 +106,10 @@ fn parse_closing_element(p: &mut HtmlParser) -> ParsedSyntax {
let m = p.start();
p.bump(T![<]);
p.bump(T![/]);
let should_be_self_closing = VOID_ELEMENTS.contains(&p.cur_text());
if should_be_self_closing {
p.error(void_element_should_not_have_closing_tag(p, p.cur_range()).into_diagnostic(p));
}
let _name = parse_literal(p);
p.bump(T![>]);
Present(m.complete(p, HTML_CLOSING_ELEMENT))
Expand Down
17 changes: 17 additions & 0 deletions crates/biome_html_parser/src/syntax/parse_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ pub(crate) fn expected_closing_tag(p: &HtmlParser, range: TextRange) -> ParseDia
expected_node("closing tag", range, p).into_diagnostic(p)
}

pub(crate) fn expected_matching_closing_tag(p: &HtmlParser, range: TextRange) -> ParseDiagnostic {
expected_node("matching closing tag", range, p).into_diagnostic(p)
}

/// The parser was encountered a tag that does not have a name.
///
/// ```html
Expand All @@ -39,3 +43,16 @@ pub(crate) fn expected_closing_tag(p: &HtmlParser, range: TextRange) -> ParseDia
pub(crate) fn expected_element_name(p: &HtmlParser, range: TextRange) -> ParseDiagnostic {
expected_node("element name", range, p).into_diagnostic(p)
}

/// Void elements should not have a closing tag.
///
/// ```html
/// <img></img>
/// ^^^^^^ should not have a closing tag
/// ```
pub(crate) fn void_element_should_not_have_closing_tag(
_p: &HtmlParser,
range: TextRange,
) -> ParseDiagnostic {
ParseDiagnostic::new("Void elements should not have a closing tag.", range)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<span>foo<br>This text is inside br.</br>bar</span>
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
---
source: crates/biome_html_parser/tests/spec_test.rs
expression: snapshot
---
## Input

```html
<span>foo<br>This text is inside br.</br>bar</span>
```


## AST

```
HtmlRoot {
bom_token: missing (optional),
directive: missing (optional),
html: HtmlBogusElement {
items: [
HtmlOpeningElement {
l_angle_token: L_ANGLE@0..1 "<" [] [],
name: HtmlName {
value_token: HTML_LITERAL@1..5 "span" [] [],
},
attributes: HtmlAttributeList [],
r_angle_token: R_ANGLE@5..6 ">" [] [],
},
HtmlElementList [
HtmlContent {
value_token: HTML_LITERAL@6..9 "foo" [] [],
},
HtmlSelfClosingElement {
l_angle_token: L_ANGLE@9..10 "<" [] [],
name: HtmlName {
value_token: HTML_LITERAL@10..12 "br" [] [],
},
attributes: HtmlAttributeList [],
slash_token: missing (optional),
r_angle_token: R_ANGLE@12..13 ">" [] [],
},
HtmlContent {
value_token: HTML_LITERAL@13..18 "This" [] [Whitespace(" ")],
},
HtmlContent {
value_token: HTML_LITERAL@18..23 "text" [] [Whitespace(" ")],
},
HtmlContent {
value_token: HTML_LITERAL@23..26 "is" [] [Whitespace(" ")],
},
HtmlContent {
value_token: HTML_LITERAL@26..33 "inside" [] [Whitespace(" ")],
},
HtmlContent {
value_token: HTML_LITERAL@33..36 "br." [] [],
},
],
HtmlBogusElement {
items: [
L_ANGLE@36..37 "<" [] [],
SLASH@37..38 "/" [] [],
HtmlName {
value_token: HTML_LITERAL@38..40 "br" [] [],
},
R_ANGLE@40..41 ">" [] [],
],
},
HtmlElementList [
HtmlContent {
value_token: HTML_LITERAL@41..44 "bar" [] [],
},
],
HtmlClosingElement {
l_angle_token: L_ANGLE@44..45 "<" [] [],
slash_token: SLASH@45..46 "/" [] [],
name: HtmlName {
value_token: HTML_LITERAL@46..50 "span" [] [],
},
r_angle_token: R_ANGLE@50..51 ">" [] [],
},
],
},
eof_token: EOF@51..52 "" [Newline("\n")] [],
}
```

## CST

```
0: [email protected]
0: (empty)
1: (empty)
2: [email protected]
0: [email protected]
0: [email protected] "<" [] []
1: [email protected]
0: [email protected] "span" [] []
2: [email protected]
3: [email protected] ">" [] []
1: [email protected]
0: [email protected]
0: [email protected] "foo" [] []
1: [email protected]
0: [email protected] "<" [] []
1: [email protected]
0: [email protected] "br" [] []
2: [email protected]
3: (empty)
4: [email protected] ">" [] []
2: [email protected]
0: [email protected] "This" [] [Whitespace(" ")]
3: [email protected]
0: [email protected] "text" [] [Whitespace(" ")]
4: [email protected]
0: [email protected] "is" [] [Whitespace(" ")]
5: [email protected]
0: [email protected] "inside" [] [Whitespace(" ")]
6: [email protected]
0: [email protected] "br." [] []
2: [email protected]
0: [email protected] "<" [] []
1: [email protected] "/" [] []
2: [email protected]
0: [email protected] "br" [] []
3: [email protected] ">" [] []
3: [email protected]
0: [email protected]
0: [email protected] "bar" [] []
4: [email protected]
0: [email protected] "<" [] []
1: [email protected] "/" [] []
2: [email protected]
0: [email protected] "span" [] []
3: [email protected] ">" [] []
3: [email protected] "" [Newline("\n")] []
```

## Diagnostics

```
br-with-end.html:1:39 parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
× Void elements should not have a closing tag.
> 1 │ <span>foo<br>This text is inside br.</br>bar</span>
│ ^^
2 │
br-with-end.html:1:37 parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
× Expected a matching closing tag but instead found '</br>'.
> 1 │ <span>foo<br>This text is inside br.</br>bar</span>
│ ^^^^^
2 │
i Expected a matching closing tag here.
> 1 │ <span>foo<br>This text is inside br.</br>bar</span>
│ ^^^^^
2 │
```
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<span>foo<br>bar</span>
Loading

0 comments on commit 39f7456

Please sign in to comment.