diff --git a/.changeset/fix-html-parser-digit-tag-start.md b/.changeset/fix-html-parser-digit-tag-start.md new file mode 100644 index 000000000000..7c97f6df2819 --- /dev/null +++ b/.changeset/fix-html-parser-digit-tag-start.md @@ -0,0 +1,5 @@ +--- +"@biomejs/biome": patch +--- + +Fixed [#8363](https://github.com/biomejs/biome/issues/8363): HTML parser no longer crashes when encountering a `<` character followed by a digit in text content (e.g., `<12 months`). The parser now correctly emits an "Unescaped `<` bracket character" error instead of treating `<12` as a tag name and crashing. diff --git a/crates/biome_html_parser/src/lexer/mod.rs b/crates/biome_html_parser/src/lexer/mod.rs index 8c6dca5fd6e3..3f108b4e883c 100644 --- a/crates/biome_html_parser/src/lexer/mod.rs +++ b/crates/biome_html_parser/src/lexer/mod.rs @@ -219,7 +219,7 @@ impl<'src> HtmlLexer<'src> { // https://html.spec.whatwg.org/multipage/syntax.html#start-tags if self .peek_byte() - .is_some_and(|b| is_tag_name_byte(b) || b == b'!' || b == b'/' || b == b'>') + .is_some_and(|b| is_tag_start_byte(b) || b == b'!' || b == b'/' || b == b'>') { self.consume_l_angle() } else { @@ -1238,6 +1238,12 @@ fn is_tag_name_byte(byte: u8) -> bool { byte.is_ascii_alphanumeric() || byte == b'-' || byte == b':' || byte == b'.' } +fn is_tag_start_byte(byte: u8) -> bool { + // Tag names must start with an ASCII letter (not a digit) + // https://html.spec.whatwg.org/#valid-custom-element-name + byte.is_ascii_alphabetic() +} + fn is_attribute_name_byte(byte: u8) -> bool { // https://html.spec.whatwg.org/#attributes-2 byte.is_ascii() diff --git a/crates/biome_html_parser/tests/html_specs/error/element/tag-name-starts-with-digit.html b/crates/biome_html_parser/tests/html_specs/error/element/tag-name-starts-with-digit.html new file mode 100644 index 000000000000..9fdb041715bf --- /dev/null +++ b/crates/biome_html_parser/tests/html_specs/error/element/tag-name-starts-with-digit.html @@ -0,0 +1 @@ +
Target <12 months
diff --git a/crates/biome_html_parser/tests/html_specs/error/element/tag-name-starts-with-digit.html.snap b/crates/biome_html_parser/tests/html_specs/error/element/tag-name-starts-with-digit.html.snap new file mode 100644 index 000000000000..24b93ce85cce --- /dev/null +++ b/crates/biome_html_parser/tests/html_specs/error/element/tag-name-starts-with-digit.html.snap @@ -0,0 +1,100 @@ +--- +source: crates/biome_html_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +```html +
Target <12 months
+ +``` + + +## AST + +``` +HtmlRoot { + bom_token: missing (optional), + frontmatter: missing (optional), + directive: missing (optional), + html: HtmlElementList [ + HtmlElement { + opening_element: HtmlOpeningElement { + l_angle_token: L_ANGLE@0..1 "<" [] [], + name: HtmlTagName { + value_token: HTML_LITERAL@1..4 "div" [] [], + }, + attributes: HtmlAttributeList [], + r_angle_token: R_ANGLE@4..5 ">" [] [], + }, + children: HtmlElementList [ + HtmlContent { + value_token: HTML_LITERAL@5..12 "Target" [] [Whitespace(" ")], + }, + HtmlContent { + value_token: HTML_LITERAL@12..13 "<" [] [], + }, + HtmlContent { + value_token: HTML_LITERAL@13..22 "12 months" [] [], + }, + ], + closing_element: HtmlClosingElement { + l_angle_token: L_ANGLE@22..23 "<" [] [], + slash_token: SLASH@23..24 "/" [] [], + name: HtmlTagName { + value_token: HTML_LITERAL@24..27 "div" [] [], + }, + r_angle_token: R_ANGLE@27..28 ">" [] [], + }, + }, + ], + eof_token: EOF@28..29 "" [Newline("\n")] [], +} +``` + +## CST + +``` +0: HTML_ROOT@0..29 + 0: (empty) + 1: (empty) + 2: (empty) + 3: HTML_ELEMENT_LIST@0..28 + 0: HTML_ELEMENT@0..28 + 0: HTML_OPENING_ELEMENT@0..5 + 0: L_ANGLE@0..1 "<" [] [] + 1: HTML_TAG_NAME@1..4 + 0: HTML_LITERAL@1..4 "div" [] [] + 2: HTML_ATTRIBUTE_LIST@4..4 + 3: R_ANGLE@4..5 ">" [] [] + 1: HTML_ELEMENT_LIST@5..22 + 0: HTML_CONTENT@5..12 + 0: HTML_LITERAL@5..12 "Target" [] [Whitespace(" ")] + 1: HTML_CONTENT@12..13 + 0: HTML_LITERAL@12..13 "<" [] [] + 2: HTML_CONTENT@13..22 + 0: HTML_LITERAL@13..22 "12 months" [] [] + 2: HTML_CLOSING_ELEMENT@22..28 + 0: L_ANGLE@22..23 "<" [] [] + 1: SLASH@23..24 "/" [] [] + 2: HTML_TAG_NAME@24..27 + 0: HTML_LITERAL@24..27 "div" [] [] + 3: R_ANGLE@27..28 ">" [] [] + 4: EOF@28..29 "" [Newline("\n")] [] + +``` + +## Diagnostics + +``` +tag-name-starts-with-digit.html:1:13 parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + × Unescaped `<` bracket character. Expected a tag or escaped character. + + > 1 │
Target <12 months
+ │ ^ + 2 │ + + i Replace this character with `<` to escape it. + +```