perf(css_parser): avoid expensive check when parsing an id

biomejs · Sep 3, 2024 · 411a485 · 411a485
1 parent 73656ec
commit 411a485
Show file tree

Hide file tree

Showing 5 changed files with 26 additions and 29 deletions.
diff --git a/crates/biome_css_parser/src/lexer/mod.rs b/crates/biome_css_parser/src/lexer/mod.rs
@@ -10,7 +10,8 @@ use biome_parser::lexer::{
 };
 use biome_rowan::SyntaxKind;
 use biome_unicode_table::{
-    is_css_id_continue, is_css_id_start, lookup_byte, Dispatch, Dispatch::*,
+    is_css_non_ascii, lookup_byte,
+    Dispatch::{self, *},
 };
 use std::char::REPLACEMENT_CHARACTER;
 
@@ -319,7 +320,7 @@ impl<'src> CssLexer<'src> {
 
             LSS => self.consume_lss(),
 
-            IDT if self.peek_byte() == Some(b'=') => {
+            IDT | DOL if self.peek_byte() == Some(b'=') => {
                 self.advance(1);
                 self.consume_byte(T!["$="])
             }
@@ -461,7 +462,7 @@ impl<'src> CssLexer<'src> {
             return match dispatch {
                 // TLD byte covers `url(~package/tilde.css)`;
                 // HAS byte covers `url(#IDofSVGpath);`
-                IDT | UNI | PRD | SLH | ZER | DIG | TLD | HAS => self.consume_url_raw_value(),
+                IDT | DOL | UNI | PRD | SLH | ZER | DIG | TLD | HAS => self.consume_url_raw_value(),
                 _ => self.consume_token(current),
             };
         }
@@ -990,16 +991,16 @@ impl<'src> CssLexer<'src> {
     /// and `None` if it is not.
     fn consume_ident_part(&mut self, current: u8) -> Option<char> {
         let chr = match lookup_byte(current) {
-            MIN | DIG | ZER => {
+            IDT | MIN | DIG | ZER => {
                 self.advance(1);
                 // SAFETY: We know that the current byte is a hyphen or a number.
                 current as char
             }
             // name code point
-            UNI | IDT => {
+            UNI => {
                 // SAFETY: We know that the current byte is a valid unicode code point
                 let chr = self.current_char_unchecked();
-                if is_css_id_continue(chr) {
+                if is_css_non_ascii(chr) {
                     self.advance(chr.len_utf8());
                     chr
                 } else {
@@ -1273,26 +1274,28 @@ impl<'src> CssLexer<'src> {
                             return false;
                         };
                         match lookup_byte(next) {
-                            MIN | DIG | ZER => true,
+                            IDT | MIN | DIG | ZER => true,
                             // If the third code point is a name-start code point
                             // return true.
-                            UNI | IDT if is_css_id_continue(self.char_unchecked_at(2)) => true,
+                            UNI => is_css_non_ascii(self.char_unchecked_at(2)),
                             // or the third and fourth code points are a valid escape
                             // return true.
                             BSL => self.is_valid_escape_at(3),
                             _ => false,
                         }
                     }
+                    IDT => true,
                     // If the second code point is a name-start code point
                     // return true.
-                    UNI | IDT if is_css_id_start(self.peek_char_unchecked()) => true,
+                    UNI => is_css_non_ascii(self.peek_char_unchecked()),
                     // or the second and third code points are a valid escape
                     // return true.
                     BSL => self.is_valid_escape_at(2),
                     _ => false,
                 }
             }
-            UNI | IDT if is_css_id_start(self.current_char_unchecked()) => true,
+            IDT => true,
+            UNI => is_css_non_ascii(self.current_char_unchecked()),
             // U+005C REVERSE SOLIDUS (\)
             // If the first and second code points are a valid escape, return true. Otherwise,
             // return false.

diff --git a/crates/biome_js_parser/src/lexer/mod.rs b/crates/biome_js_parser/src/lexer/mod.rs
@@ -852,7 +852,7 @@ impl<'src> JsLexer<'src> {
         let b = unsafe { self.current_unchecked() };
 
         match lookup_byte(b) {
-            IDT | DIG | ZER => Some((b as char, false)),
+            IDT | DOL | DIG | ZER => Some((b as char, false)),
             // FIXME: This should use ID_Continue, not XID_Continue
             UNI => {
                 let chr = self.current_char_unchecked();
@@ -920,7 +920,7 @@ impl<'src> JsLexer<'src> {
                     false
                 }
             }
-            IDT => true,
+            IDT | DOL => true,
             _ => false,
         }
     }
@@ -1880,7 +1880,7 @@ impl<'src> JsLexer<'src> {
                     ERROR_TOKEN
                 }
             }
-            IDT => self.resolve_identifier(byte as char),
+            IDT | DOL => self.resolve_identifier(byte as char),
             DIG => {
                 self.read_number(false);
                 self.verify_number_end()

diff --git a/crates/biome_json_parser/src/lexer/mod.rs b/crates/biome_json_parser/src/lexer/mod.rs
@@ -307,7 +307,7 @@ impl<'src> Lexer<'src> {
         match dispatched {
             WHS => self.consume_newline_or_whitespaces(),
             QOT => self.lex_string_literal(current),
-            IDT => self.lex_identifier(current),
+            IDT | DOL => self.lex_identifier(current),
             COM => self.eat_byte(T![,]),
             MIN | DIG | ZER => self.lex_number(current),
             COL => self.eat_byte(T![:]),
@@ -689,7 +689,7 @@ impl<'src> Lexer<'src> {
         while let Some(byte) = self.current_byte() {
             self.current_char_unchecked();
             match lookup_byte(byte) {
-                IDT | DIG | ZER => {
+                IDT | DOL | DIG | ZER => {
                     keyword = keyword.next_character(byte);
                     self.advance(1)
                 }

diff --git a/crates/biome_unicode_table/src/bytes.rs b/crates/biome_unicode_table/src/bytes.rs
@@ -16,9 +16,12 @@ pub enum Dispatch {
     /// Single `'` or Double quote `"`
     QOT,
 
-    /// ASCII identifier, or `$`, `_`
+    /// ASCII letter or `_`
     IDT,
 
+    /// Dollar sign `$`
+    DOL,
+
     /// Hash `#`
     HAS,
 
@@ -115,7 +118,7 @@ pub(crate) static DISPATCHER: [Dispatch; 256] = [
     //0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F   //
     ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, WHS, WHS, WHS, WHS, WHS, ERR, ERR, // 0
     ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, // 1
-    WHS, EXL, QOT, HAS, IDT, PRC, AMP, QOT, PNO, PNC, MUL, PLS, COM, MIN, PRD, SLH, // 2
+    WHS, EXL, QOT, HAS, DOL, PRC, AMP, QOT, PNO, PNC, MUL, PLS, COM, MIN, PRD, SLH, // 2
     ZER, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, COL, SEM, LSS, EQL, MOR, QST, // 3
     AT_, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, // 4
     IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, BTO, BSL, BTC, CRT, IDT, // 5

diff --git a/crates/biome_unicode_table/src/lib.rs b/crates/biome_unicode_table/src/lib.rs
@@ -12,15 +12,12 @@ pub fn is_html_id_start(c: char) -> bool {
     ID_Start(c)
 }
 
-/// Tests if `c` is a valid start of a CSS identifier
+/// Is `c` a CSS non-ascii character.
 #[inline]
-pub fn is_css_id_start(c: char) -> bool {
+pub fn is_css_non_ascii(c: char) -> bool {
     matches!(
         c as u32,
-        0x41..=0x5a // A-Z
-        | 0x5f // `_`
-        | 0x61..=0x7a // a-z
-        | 0xB7
+        0xB7
         | 0xc0..=0xd6
         | 0xd8..=0xf6
         | 0xf8..=0x37D
@@ -38,12 +35,6 @@ pub fn is_css_id_start(c: char) -> bool {
     )
 }
 
-/// Tests if `c` is a valid continuation of a CSS identifier.
-#[inline]
-pub fn is_css_id_continue(c: char) -> bool {
-    matches!(c, '0'..='9' | '-') || is_css_id_start(c)
-}
-
 /// Tests if `c` is a valid start of a js identifier
 #[inline]
 pub fn is_js_id_start(c: char) -> bool {