Skip to content

Commit e41c0ac

Browse files
authored
perf(es/parser): Eliminate the outer loop of skip_block_comment (#11261)
**Description:** The continuation of `byte_search!` can be merged into the `continue_if`
1 parent 2cea7dd commit e41c0ac

File tree

2 files changed

+66
-93
lines changed

2 files changed

+66
-93
lines changed

.changeset/eight-seas-decide.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
swc_ecma_parser: patch
3+
swc_core: patch
4+
---
5+
6+
perf(es/parser): eliminate the outer loop of `skip_block_comment`

crates/swc_ecma_parser/src/lexer/mod.rs

Lines changed: 60 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -754,121 +754,88 @@ impl<'a> Lexer<'a> {
754754
let slice_start = self.cur_pos();
755755

756756
let had_line_break_before_last = self.had_line_break_before_last();
757-
let mut should_mark_had_line_break = false;
758757

759-
loop {
760-
let matched_byte = byte_search! {
761-
lexer: self,
762-
table: BLOCK_COMMENT_SCAN_TABLE,
763-
continue_if: (matched_byte, pos_offset) {
764-
if matched_byte == LS_OR_PS_FIRST {
758+
byte_search! {
759+
lexer: self,
760+
table: BLOCK_COMMENT_SCAN_TABLE,
761+
continue_if: (matched_byte, pos_offset) {
762+
match matched_byte {
763+
LS_OR_PS_FIRST => {
765764
// 0xE2 - could be LS/PS or some other Unicode character
766765
let current_slice = self.input().as_str();
767766
let byte_pos = pos_offset;
768767
if byte_pos + 2 < current_slice.len() {
769768
let bytes = current_slice.as_bytes();
770769
let next2 = [bytes[byte_pos + 1], bytes[byte_pos + 2]];
771770
if next2 == LS_BYTES_2_AND_3 || next2 == PS_BYTES_2_AND_3 {
772-
// It's a real line terminator - don't continue
773-
false
774-
} else {
775-
// Some other Unicode character starting with 0xE2
776-
true
771+
self.state_mut().mark_had_line_break();
772+
self.input_mut().bump_bytes(2);
777773
}
778-
} else {
779-
// Not enough bytes for full LS/PS sequence
780-
true
781774
}
782-
} else {
783-
// '*', '\r', or '\n' - don't continue
784-
false
785-
}
786-
},
787-
handle_eof: {
788-
if should_mark_had_line_break {
789-
self.state_mut().mark_had_line_break();
775+
true
790776
}
791-
let end_pos = self.input().end_pos();
792-
let span = Span::new_with_checked(end_pos, end_pos);
793-
self.emit_error_span(span, SyntaxError::UnterminatedBlockComment);
794-
return;
795-
}
796-
};
797-
798-
match matched_byte {
799-
b'*' => {
800-
if self.peek() == Some('/') {
801-
// Consume "*/"
802-
self.input_mut().bump_bytes(2);
777+
b'*' => {
778+
let bytes = self.input().as_str().as_bytes();
779+
if bytes.get(pos_offset + 1) == Some(&b'/') {
780+
// Consume "*/"
781+
self.input_mut().bump_bytes(pos_offset + 2);
803782

804-
if should_mark_had_line_break {
805-
self.state_mut().mark_had_line_break();
806-
}
783+
let end = self.cur_pos();
807784

808-
let end = self.cur_pos();
785+
// Decide trailing / leading
786+
let mut is_for_next =
787+
had_line_break_before_last || !self.state().can_have_trailing_comment();
809788

810-
// Decide trailing / leading
811-
let mut is_for_next =
812-
had_line_break_before_last || !self.state().can_have_trailing_comment();
813-
814-
// If next char is ';' without newline, treat as trailing
815-
if !had_line_break_before_last && self.input().is_byte(b';') {
816-
is_for_next = false;
817-
}
789+
// If next char is ';' without newline, treat as trailing
790+
if !had_line_break_before_last && self.input().is_byte(b';') {
791+
is_for_next = false;
792+
}
818793

819-
if self.comments_buffer().is_some() {
820-
let src = unsafe {
821-
// Safety: We got slice_start and end from self.input so those are
822-
// valid.
823-
self.input_mut().slice(slice_start, end)
824-
};
825-
let s = &src[..src.len() - 2];
826-
let cmt = Comment {
827-
kind: CommentKind::Block,
828-
span: Span::new_with_checked(start, end),
829-
text: self.atom(s),
830-
};
794+
if self.comments_buffer().is_some() {
795+
let src = unsafe {
796+
// Safety: We got slice_start and end from self.input so those are
797+
// valid.
798+
self.input_mut().slice(slice_start, end)
799+
};
800+
let s = &src[..src.len() - 2];
801+
let cmt = Comment {
802+
kind: CommentKind::Block,
803+
span: Span::new_with_checked(start, end),
804+
text: self.atom(s),
805+
};
831806

832-
if is_for_next {
833-
self.comments_buffer_mut().unwrap().push_pending(cmt);
834-
} else {
835-
let pos = self.state().prev_hi();
836-
self.comments_buffer_mut()
837-
.unwrap()
838-
.push_comment(BufferedComment {
839-
kind: BufferedCommentKind::Trailing,
840-
pos,
841-
comment: cmt,
842-
});
807+
if is_for_next {
808+
self.comments_buffer_mut().unwrap().push_pending(cmt);
809+
} else {
810+
let pos = self.state().prev_hi();
811+
self.comments_buffer_mut()
812+
.unwrap()
813+
.push_comment(BufferedComment {
814+
kind: BufferedCommentKind::Trailing,
815+
pos,
816+
comment: cmt,
817+
});
818+
}
843819
}
820+
821+
return;
844822
}
845823

846-
return;
847-
} else {
848-
// Just a lone '*', consume it and continue.
849-
self.bump();
850-
}
851-
}
852-
b'\n' => {
853-
should_mark_had_line_break = true;
854-
self.bump();
855-
}
856-
b'\r' => {
857-
should_mark_had_line_break = true;
858-
self.bump();
859-
if self.peek() == Some('\n') {
860-
self.bump();
861-
}
862-
}
863-
_ => {
864-
// Unicode line terminator (LS/PS) or other character
865-
if let Some('\u{2028}' | '\u{2029}') = self.cur() {
866-
should_mark_had_line_break = true;
824+
true
867825
}
868-
self.bump();
826+
_ => {
827+
self.state_mut().mark_had_line_break();
828+
true
829+
},
869830
}
831+
},
832+
handle_eof: {
833+
let end_pos = self.input().end_pos();
834+
let span = Span::new_with_checked(end_pos, end_pos);
835+
self.emit_error_span(span, SyntaxError::UnterminatedBlockComment);
836+
return;
870837
}
871-
}
838+
};
872839
}
873840

874841
/// Ensure that ident cannot directly follow numbers.

0 commit comments

Comments
 (0)