Skip to content

Commit

Permalink
feat(parser/html): lex and parse unquoted attribute values
Browse files Browse the repository at this point in the history
  • Loading branch information
dyc3 committed Sep 16, 2024
1 parent 2a775c7 commit f991283
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 1 deletion.
27 changes: 27 additions & 0 deletions crates/biome_html_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,15 @@ impl<'src> HtmlLexer<'src> {
}
}

fn consume_token_attribute_value(&mut self, current: u8) -> HtmlSyntaxKind {
match current {
b'\n' | b'\r' | b'\t' | b' ' => self.consume_newline_or_whitespaces(),

b'\'' | b'"' => self.consume_string_literal(current),
_ => self.consume_unquoted_string_literal(),
}
}

/// Bumps the current byte and creates a lexed token of the passed in kind.
#[inline]
fn consume_byte(&mut self, tok: HtmlSyntaxKind) -> HtmlSyntaxKind {
Expand Down Expand Up @@ -233,6 +242,23 @@ impl<'src> HtmlLexer<'src> {
}
}

/// Consume an attribute value that is not quoted.
///
/// See: https://html.spec.whatwg.org/#attributes-2 under "Unquoted attribute value syntax"
fn consume_unquoted_string_literal(&mut self) -> HtmlSyntaxKind {
while let Some(current) = self.current_byte() {
match current {
b'\n' | b'\r' | b'\t' | b' ' | b'?' | b'\'' | b'"' | b'=' | b'<' | b'>' | b'`' => {
break
}
_ if current.is_ascii() => self.advance(1),
_ => break,
}
}

HTML_STRING_LITERAL
}

fn consume_l_angle(&mut self) -> HtmlSyntaxKind {
self.assert_byte(b'<');

Expand Down Expand Up @@ -385,6 +411,7 @@ impl<'src> Lexer<'src> for HtmlLexer<'src> {
Some(current) => match context {
HtmlLexContext::Regular => self.consume_token(current),
HtmlLexContext::OutsideTag => self.consume_token_outside_tag(current),
HtmlLexContext::AttributeValue => self.consume_token_attribute_value(current),
},
None => EOF,
}
Expand Down
2 changes: 1 addition & 1 deletion crates/biome_html_parser/src/syntax/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ fn parse_attribute_initializer(p: &mut HtmlParser) -> ParsedSyntax {
return Absent;
}
let m = p.start();
p.bump(T![=]);
p.bump_with_context(T![=], HtmlLexContext::AttributeValue);
parse_string_literal(p).or_add_diagnostic(p, expected_initializer);
Present(m.complete(p, HTML_ATTRIBUTE_INITIALIZER_CLAUSE))
}
4 changes: 4 additions & 0 deletions crates/biome_html_parser/src/token_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ pub(crate) enum HtmlLexContext {
///
/// The exeptions being `<` which indicates the start of a tag, and `>` which is invalid syntax if not preceeded with a `<`.
OutsideTag,
/// When the parser encounters a `=` token (the beginning of the attribute initializer clause), it switches to this context.
///
/// This is because attribute values can start and end with a `"` or `'` character, or be unquoted, and the lexer needs to know to start lexing a string literal.
AttributeValue,
}

impl LexContext for HtmlLexContext {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<img src=foo.png />
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
---
source: crates/biome_html_parser/tests/spec_test.rs
expression: snapshot
---
## Input

```html
<img src=foo.png />
```


## AST

```
HtmlRoot {
bom_token: missing (optional),
directive: missing (optional),
html: HtmlSelfClosingElement {
l_angle_token: L_ANGLE@0..1 "<" [] [],
name: HtmlName {
value_token: HTML_LITERAL@1..5 "img" [] [Whitespace(" ")],
},
attributes: HtmlAttributeList [
HtmlAttribute {
name: HtmlName {
value_token: HTML_LITERAL@5..8 "src" [] [],
},
initializer: HtmlAttributeInitializerClause {
eq_token: EQ@8..9 "=" [] [],
value: HtmlString {
value_token: HTML_STRING_LITERAL@9..17 "foo.png" [] [Whitespace(" ")],
},
},
},
],
slash_token: SLASH@17..18 "/" [] [],
r_angle_token: R_ANGLE@18..19 ">" [] [],
},
eof_token: EOF@19..20 "" [Newline("\n")] [],
}
```

## CST

```
0: [email protected]
0: (empty)
1: (empty)
2: [email protected]
0: [email protected] "<" [] []
1: [email protected]
0: [email protected] "img" [] [Whitespace(" ")]
2: [email protected]
0: [email protected]
0: [email protected]
0: [email protected] "src" [] []
1: [email protected]
0: [email protected] "=" [] []
1: [email protected]
0: [email protected] "foo.png" [] [Whitespace(" ")]
3: [email protected] "/" [] []
4: [email protected] ">" [] []
3: [email protected] "" [Newline("\n")] []
```

0 comments on commit f991283

Please sign in to comment.