Skip to content

Commit

Permalink
perf(css_parser): avoid expensive check when parsing an id
Browse files Browse the repository at this point in the history
  • Loading branch information
Conaclos committed Sep 3, 2024
1 parent 73656ec commit 411a485
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 29 deletions.
23 changes: 13 additions & 10 deletions crates/biome_css_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ use biome_parser::lexer::{
};
use biome_rowan::SyntaxKind;
use biome_unicode_table::{
is_css_id_continue, is_css_id_start, lookup_byte, Dispatch, Dispatch::*,
is_css_non_ascii, lookup_byte,
Dispatch::{self, *},
};
use std::char::REPLACEMENT_CHARACTER;

Expand Down Expand Up @@ -319,7 +320,7 @@ impl<'src> CssLexer<'src> {

LSS => self.consume_lss(),

IDT if self.peek_byte() == Some(b'=') => {
IDT | DOL if self.peek_byte() == Some(b'=') => {
self.advance(1);
self.consume_byte(T!["$="])
}
Expand Down Expand Up @@ -461,7 +462,7 @@ impl<'src> CssLexer<'src> {
return match dispatch {
// TLD byte covers `url(~package/tilde.css)`;
// HAS byte covers `url(#IDofSVGpath);`
IDT | UNI | PRD | SLH | ZER | DIG | TLD | HAS => self.consume_url_raw_value(),
IDT | DOL | UNI | PRD | SLH | ZER | DIG | TLD | HAS => self.consume_url_raw_value(),
_ => self.consume_token(current),
};
}
Expand Down Expand Up @@ -990,16 +991,16 @@ impl<'src> CssLexer<'src> {
/// and `None` if it is not.
fn consume_ident_part(&mut self, current: u8) -> Option<char> {
let chr = match lookup_byte(current) {
MIN | DIG | ZER => {
IDT | MIN | DIG | ZER => {
self.advance(1);
// SAFETY: We know that the current byte is a hyphen or a number.
current as char
}
// name code point
UNI | IDT => {
UNI => {
// SAFETY: We know that the current byte is a valid unicode code point
let chr = self.current_char_unchecked();
if is_css_id_continue(chr) {
if is_css_non_ascii(chr) {
self.advance(chr.len_utf8());
chr
} else {
Expand Down Expand Up @@ -1273,26 +1274,28 @@ impl<'src> CssLexer<'src> {
return false;
};
match lookup_byte(next) {
MIN | DIG | ZER => true,
IDT | MIN | DIG | ZER => true,
// If the third code point is a name-start code point
// return true.
UNI | IDT if is_css_id_continue(self.char_unchecked_at(2)) => true,
UNI => is_css_non_ascii(self.char_unchecked_at(2)),
// or the third and fourth code points are a valid escape
// return true.
BSL => self.is_valid_escape_at(3),
_ => false,
}
}
IDT => true,
// If the second code point is a name-start code point
// return true.
UNI | IDT if is_css_id_start(self.peek_char_unchecked()) => true,
UNI => is_css_non_ascii(self.peek_char_unchecked()),
// or the second and third code points are a valid escape
// return true.
BSL => self.is_valid_escape_at(2),
_ => false,
}
}
UNI | IDT if is_css_id_start(self.current_char_unchecked()) => true,
IDT => true,
UNI => is_css_non_ascii(self.current_char_unchecked()),
// U+005C REVERSE SOLIDUS (\)
// If the first and second code points are a valid escape, return true. Otherwise,
// return false.
Expand Down
6 changes: 3 additions & 3 deletions crates/biome_js_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ impl<'src> JsLexer<'src> {
let b = unsafe { self.current_unchecked() };

match lookup_byte(b) {
IDT | DIG | ZER => Some((b as char, false)),
IDT | DOL | DIG | ZER => Some((b as char, false)),
// FIXME: This should use ID_Continue, not XID_Continue
UNI => {
let chr = self.current_char_unchecked();
Expand Down Expand Up @@ -920,7 +920,7 @@ impl<'src> JsLexer<'src> {
false
}
}
IDT => true,
IDT | DOL => true,
_ => false,
}
}
Expand Down Expand Up @@ -1880,7 +1880,7 @@ impl<'src> JsLexer<'src> {
ERROR_TOKEN
}
}
IDT => self.resolve_identifier(byte as char),
IDT | DOL => self.resolve_identifier(byte as char),
DIG => {
self.read_number(false);
self.verify_number_end()
Expand Down
4 changes: 2 additions & 2 deletions crates/biome_json_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ impl<'src> Lexer<'src> {
match dispatched {
WHS => self.consume_newline_or_whitespaces(),
QOT => self.lex_string_literal(current),
IDT => self.lex_identifier(current),
IDT | DOL => self.lex_identifier(current),
COM => self.eat_byte(T![,]),
MIN | DIG | ZER => self.lex_number(current),
COL => self.eat_byte(T![:]),
Expand Down Expand Up @@ -689,7 +689,7 @@ impl<'src> Lexer<'src> {
while let Some(byte) = self.current_byte() {
self.current_char_unchecked();
match lookup_byte(byte) {
IDT | DIG | ZER => {
IDT | DOL | DIG | ZER => {
keyword = keyword.next_character(byte);
self.advance(1)
}
Expand Down
7 changes: 5 additions & 2 deletions crates/biome_unicode_table/src/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@ pub enum Dispatch {
/// Single `'` or Double quote `"`
QOT,

/// ASCII identifier, or `$`, `_`
/// ASCII letter or `_`
IDT,

/// Dollar sign `$`
DOL,

/// Hash `#`
HAS,

Expand Down Expand Up @@ -115,7 +118,7 @@ pub(crate) static DISPATCHER: [Dispatch; 256] = [
//0 1 2 3 4 5 6 7 8 9 A B C D E F //
ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, WHS, WHS, WHS, WHS, WHS, ERR, ERR, // 0
ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, // 1
WHS, EXL, QOT, HAS, IDT, PRC, AMP, QOT, PNO, PNC, MUL, PLS, COM, MIN, PRD, SLH, // 2
WHS, EXL, QOT, HAS, DOL, PRC, AMP, QOT, PNO, PNC, MUL, PLS, COM, MIN, PRD, SLH, // 2
ZER, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, COL, SEM, LSS, EQL, MOR, QST, // 3
AT_, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, // 4
IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, BTO, BSL, BTC, CRT, IDT, // 5
Expand Down
15 changes: 3 additions & 12 deletions crates/biome_unicode_table/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,12 @@ pub fn is_html_id_start(c: char) -> bool {
ID_Start(c)
}

/// Tests if `c` is a valid start of a CSS identifier
/// Is `c` a CSS non-ascii character.
#[inline]
pub fn is_css_id_start(c: char) -> bool {
pub fn is_css_non_ascii(c: char) -> bool {
matches!(
c as u32,
0x41..=0x5a // A-Z
| 0x5f // `_`
| 0x61..=0x7a // a-z
| 0xB7
0xB7
| 0xc0..=0xd6
| 0xd8..=0xf6
| 0xf8..=0x37D
Expand All @@ -38,12 +35,6 @@ pub fn is_css_id_start(c: char) -> bool {
)
}

/// Tests if `c` is a valid continuation of a CSS identifier.
#[inline]
pub fn is_css_id_continue(c: char) -> bool {
matches!(c, '0'..='9' | '-') || is_css_id_start(c)
}

/// Tests if `c` is a valid start of a js identifier
#[inline]
pub fn is_js_id_start(c: char) -> bool {
Expand Down

0 comments on commit 411a485

Please sign in to comment.