diff --git a/.changeset/fix-html-bom-handling.md b/.changeset/fix-html-bom-handling.md
new file mode 100644
index 000000000000..1f4f2cb105bc
--- /dev/null
+++ b/.changeset/fix-html-bom-handling.md
@@ -0,0 +1,5 @@
+---
+"@biomejs/biome": patch
+---
+
+Fixed [#7919](https://github.com/biomejs/biome/issues/7919): The HTML parser now correctly handles Unicode BOM (Byte Order Mark) characters at the beginning of HTML files, ensuring proper parsing and tokenization.
diff --git a/crates/biome_html_parser/src/lexer/mod.rs b/crates/biome_html_parser/src/lexer/mod.rs
index 1a290af641fa..a346afb2e169 100644
--- a/crates/biome_html_parser/src/lexer/mod.rs
+++ b/crates/biome_html_parser/src/lexer/mod.rs
@@ -82,15 +82,7 @@ impl<'src> HtmlLexer<'src> {
_ if self.current_kind != T![<] && is_attribute_name_byte(current) => {
self.consume_identifier(current, false)
}
- _ => {
- if self.position == 0
- && let Some((bom, bom_size)) = self.consume_potential_bom(UNICODE_BOM)
- {
- self.unicode_bom_length = bom_size;
- return bom;
- }
- self.consume_unexpected_character()
- }
+ _ => self.consume_unexpected_character(),
}
}
@@ -134,7 +126,15 @@ impl<'src> HtmlLexer<'src> {
self.consume_byte(HTML_LITERAL)
}
}
- _ => self.consume_html_text(current),
+ _ => {
+ if self.position == 0
+ && let Some((bom, bom_size)) = self.consume_potential_bom(UNICODE_BOM)
+ {
+ self.unicode_bom_length = bom_size;
+ return bom;
+ }
+ self.consume_html_text(current)
+ }
}
}
diff --git a/crates/biome_html_parser/tests/html_specs/ok/bom.html b/crates/biome_html_parser/tests/html_specs/ok/bom.html
new file mode 100644
index 000000000000..8ab3751e09f0
--- /dev/null
+++ b/crates/biome_html_parser/tests/html_specs/ok/bom.html
@@ -0,0 +1 @@
+
diff --git a/crates/biome_html_parser/tests/html_specs/ok/bom.html.snap b/crates/biome_html_parser/tests/html_specs/ok/bom.html.snap
new file mode 100644
index 000000000000..92a48c087871
--- /dev/null
+++ b/crates/biome_html_parser/tests/html_specs/ok/bom.html.snap
@@ -0,0 +1,53 @@
+---
+source: crates/biome_html_parser/tests/spec_test.rs
+assertion_line: 138
+expression: snapshot
+---
+## Input
+
+```html
+
+
+```
+
+
+## AST
+
+```
+HtmlRoot {
+ bom_token: UNICODE_BOM@0..3 "\u{feff}" [] [],
+ frontmatter: missing (optional),
+ directive: HtmlDirective {
+ l_angle_token: L_ANGLE@3..4 "<" [] [],
+ excl_token: BANG@4..5 "!" [] [],
+ doctype_token: DOCTYPE_KW@5..12 "doctype" [] [],
+ html_token: missing (optional),
+ quirk_token: missing (optional),
+ public_id_token: missing (optional),
+ system_id_token: missing (optional),
+ r_angle_token: R_ANGLE@12..13 ">" [] [],
+ },
+ html: HtmlElementList [],
+ eof_token: EOF@13..14 "" [Newline("\n")] [],
+}
+```
+
+## CST
+
+```
+0: HTML_ROOT@0..14
+ 0: UNICODE_BOM@0..3 "\u{feff}" [] []
+ 1: (empty)
+ 2: HTML_DIRECTIVE@3..13
+ 0: L_ANGLE@3..4 "<" [] []
+ 1: BANG@4..5 "!" [] []
+ 2: DOCTYPE_KW@5..12 "doctype" [] []
+ 3: (empty)
+ 4: (empty)
+ 5: (empty)
+ 6: (empty)
+ 7: R_ANGLE@12..13 ">" [] []
+ 3: HTML_ELEMENT_LIST@13..13
+ 4: EOF@13..14 "" [Newline("\n")] []
+
+```