Skip to content

Commit

Permalink
feat(biome_graphql_parser): implement NthAt for GraphqlLexer (#2677)
Browse files Browse the repository at this point in the history
  • Loading branch information
vohoanglong0107 committed May 2, 2024
1 parent afa5004 commit 1f891f5
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 81 deletions.
28 changes: 25 additions & 3 deletions crates/biome_graphql_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mod tests;

use biome_graphql_syntax::{GraphqlSyntaxKind, GraphqlSyntaxKind::*, TextLen, TextSize, T};
use biome_parser::diagnostic::ParseDiagnostic;
use biome_parser::lexer::{Lexer, LexerCheckpoint, TokenFlags};
use biome_parser::lexer::{Lexer, LexerCheckpoint, LexerWithCheckpoint, TokenFlags};
use std::ops::Add;

#[derive(Debug)]
Expand All @@ -15,6 +15,9 @@ pub struct GraphqlLexer<'src> {
/// The start byte position in the source text of the next token.
position: usize,

/// If the source starts with a Unicode BOM, this is the number of bytes for that token.
unicode_bom_length: usize,

/// Byte offset of the current token from the start of the source
/// The range of the current token can be computed by
/// `self.position - self.current_start`.
Expand Down Expand Up @@ -116,6 +119,20 @@ impl<'src> Lexer<'src> for GraphqlLexer<'src> {
}
}

impl<'src> LexerWithCheckpoint<'src> for GraphqlLexer<'src> {
fn checkpoint(&self) -> LexerCheckpoint<Self::Kind> {
LexerCheckpoint {
position: TextSize::from(self.position as u32),
current_start: self.current_start,
current_flags: self.current_flags,
current_kind: self.current_kind,
after_line_break: self.has_preceding_line_break(),
unicode_bom_length: self.unicode_bom_length,
diagnostics_pos: self.diagnostics.len() as u32,
}
}
}

impl<'src> GraphqlLexer<'src> {
/// Make a new lexer from a str, this is safe because strs are valid utf8
pub fn from_str(source: &'src str) -> Self {
Expand All @@ -126,6 +143,7 @@ impl<'src> GraphqlLexer<'src> {
current_flags: TokenFlags::empty(),
position: 0,
diagnostics: vec![],
unicode_bom_length: 0,
}
}

Expand Down Expand Up @@ -161,8 +179,12 @@ impl<'src> GraphqlLexer<'src> {
b'#' => self.consume_comment(),
_ if is_name_start(current) => self.consume_name(current),
_ if is_number_start(current) => self.consume_number(current),
_ if self.position == 0 && self.consume_potential_bom(UNICODE_BOM).is_some() => {
UNICODE_BOM
_ if self.position == 0 => {
if let Some((bom, bom_size)) = self.consume_potential_bom(UNICODE_BOM) {
self.unicode_bom_length = bom_size;
return bom;
}
self.consume_unexpected_character()
}
_ => self.consume_unexpected_character(),
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ fn parse_field(p: &mut GraphqlParser) -> ParsedSyntax {

// alias is optional, so if there is a colon, we parse it as an alias
// otherwise we parse it as a normal field name
if p.lookahead_at(T![:]) {
if p.nth_at(1, T![:]) {
let m = p.start();

// name is checked for in `is_at_field`
Expand Down
6 changes: 3 additions & 3 deletions crates/biome_graphql_parser/src/parser/definitions/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ fn parse_root_operation_type_definition(p: &mut GraphqlParser) -> ParsedSyntax {

#[inline]
pub(crate) fn is_at_schema_definition(p: &mut GraphqlParser<'_>) -> bool {
p.at(T![schema]) || (is_at_string(p) && p.lookahead_at(T![schema]))
p.at(T![schema]) || (is_at_string(p) && p.nth_at(1, T![schema]))
}

#[inline]
Expand All @@ -117,7 +117,7 @@ fn is_at_root_operation_type_definition(p: &mut GraphqlParser<'_>) -> bool {
// missing operation type
|| p.at(T![:])
// there is likely a typo in the operation type
|| p.lookahead_at(T![:])
|| p.nth_at(1, T![:])
}

/// To prevent a missing closing brace from causing the parser to include the next definition
Expand All @@ -136,5 +136,5 @@ fn is_at_root_operation_type_definition_end(p: &mut GraphqlParser<'_>) -> bool {
p.at(T!['}'])
|| (!p.at_ts(OPERATION_TYPE) && is_at_definition(p))
// start of a new operation definition
|| (p.at_ts(OPERATION_TYPE) && !p.lookahead_at(T![:]))
|| (p.at_ts(OPERATION_TYPE) && !p.nth_at(1, T![:]))
}
8 changes: 0 additions & 8 deletions crates/biome_graphql_parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,6 @@ impl<'source> GraphqlParser<'source> {
}
}

pub fn lookahead(&mut self) -> GraphqlSyntaxKind {
self.source.lookahead()
}

pub fn lookahead_at(&mut self, kind: GraphqlSyntaxKind) -> bool {
self.source.lookahead_at(kind)
}

pub fn finish(
self,
) -> (
Expand Down
2 changes: 1 addition & 1 deletion crates/biome_graphql_parser/src/parser/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ fn is_at_list(p: &GraphqlParser) -> bool {
fn is_at_list_end(p: &mut GraphqlParser) -> bool {
p.at(T![']'])
// at next argument
|| p.lookahead() == T![:]
|| p.nth_at(1, T![:])
// value is only used in argument
|| is_at_argument_list_end(p)
}
Expand Down
168 changes: 103 additions & 65 deletions crates/biome_graphql_parser/src/token_source.rs
Original file line number Diff line number Diff line change
@@ -1,150 +1,188 @@
use std::collections::VecDeque;

use crate::lexer::GraphqlLexer;
use biome_graphql_syntax::GraphqlSyntaxKind::{EOF, TOMBSTONE};
use biome_graphql_syntax::GraphqlSyntaxKind::EOF;
use biome_graphql_syntax::{GraphqlSyntaxKind, TextRange};
use biome_parser::diagnostic::ParseDiagnostic;
use biome_parser::lexer::Lexer;
use biome_parser::lexer::BufferedLexer;
use biome_parser::prelude::TokenSource;
use biome_parser::token_source::Trivia;
use biome_parser::token_source::{NthToken, Trivia};
use biome_rowan::TriviaPieceKind;

pub(crate) struct GraphqlTokenSource<'source> {
lexer: GraphqlLexer<'source>,
trivia: Vec<Trivia>,
current: NonTriviaToken,
next: Option<NonTriviaToken>,
lexer: BufferedLexer<'source, GraphqlLexer<'source>>,
trivia_list: Vec<Trivia>,

/// Cache for the non-trivia token lookahead. For example for the source `let a = 10;` if the
/// [TokenSource]'s currently positioned at the start of the file (`let`). The `nth(2)` non-trivia token,
/// as returned by the [TokenSource], is the `=` token but retrieving it requires skipping over the
/// two whitespace trivia tokens (first between `let` and `a`, second between `a` and `=`).
/// The [TokenSource] state then is:
///
/// * `non_trivia_lookahead`: [IDENT: 'a', EQ]
/// * `lookahead_offset`: 4 (the `=` is the 4th token after the `let` keyword)
non_trivia_lookahead: VecDeque<Lookahead>,

/// Offset of the last cached lookahead token from the current [BufferedLexer] token.
lookahead_offset: usize,
}

struct NonTriviaToken {
#[derive(Debug, Copy, Clone)]
struct Lookahead {
kind: GraphqlSyntaxKind,
range: TextRange,
preceding_line_break: bool,
after_newline: bool,
}

impl Default for NonTriviaToken {
fn default() -> Self {
impl<'source> GraphqlTokenSource<'source> {
pub(crate) fn new(lexer: BufferedLexer<'source, GraphqlLexer<'source>>) -> Self {
Self {
kind: TOMBSTONE,
range: TextRange::default(),
preceding_line_break: false,
lexer,
trivia_list: Vec::new(),
non_trivia_lookahead: VecDeque::new(),
lookahead_offset: 0,
}
}
}

impl<'source> GraphqlTokenSource<'source> {
pub fn from_str(source: &'source str) -> Self {
let lexer = GraphqlLexer::from_str(source);
let lexer = BufferedLexer::new(lexer);

let mut source = Self {
lexer,
trivia: Vec::new(),
current: NonTriviaToken::default(),
next: None,
};
let mut source = GraphqlTokenSource::new(lexer);

source.advance_to_next_non_trivia_token(true);
source.next_non_trivia_token(true);
source
}

fn advance_to_next_non_trivia_token(&mut self, first_token: bool) {
self.current = match self.next.take() {
Some(next) => next,
None => self.next_non_trivia_token(first_token),
#[inline(always)]
fn lookahead(&mut self, n: usize) -> Option<Lookahead> {
assert_ne!(n, 0);

// Return the cached token if any
if let Some(lookahead) = self.non_trivia_lookahead.get(n - 1) {
return Some(*lookahead);
}
}

pub fn lookahead(&mut self) -> GraphqlSyntaxKind {
match self.next.as_ref() {
Some(next) => next.kind,
None if self.current.kind != EOF => {
let next_token = self.next_non_trivia_token(false);
let next_kind = next_token.kind;
self.next = Some(next_token);
next_kind
// Jump right to where we've left of last time rather than going through all tokens again.
let iter = self.lexer.lookahead().skip(self.lookahead_offset);
let mut remaining = n - self.non_trivia_lookahead.len();

for item in iter {
self.lookahead_offset += 1;

if !item.kind().is_trivia() {
remaining -= 1;

let lookahead = Lookahead {
after_newline: item.has_preceding_line_break(),
kind: item.kind(),
};

self.non_trivia_lookahead.push_back(lookahead);

if remaining == 0 {
return Some(lookahead);
}
}
None => EOF,
}
}

// We mostly look ahead by one token
// so there is no need to implement NthToken to use nth_at
pub fn lookahead_at(&mut self, kind: GraphqlSyntaxKind) -> bool {
self.lookahead() == kind
None
}

#[must_use]
fn next_non_trivia_token(&mut self, first_token: bool) -> NonTriviaToken {
let mut non_trivia_token = NonTriviaToken::default();

fn next_non_trivia_token(&mut self, first_token: bool) {
let mut processed_tokens = 0;
let mut trailing = !first_token;

// Drop the last cached lookahead, we're now moving past it
self.non_trivia_lookahead.pop_front();

loop {
let kind = self.lexer.next_token(());
processed_tokens += 1;

let trivia_kind = TriviaPieceKind::try_from(kind);

match trivia_kind {
Err(_) => {
// Not trivia
non_trivia_token.kind = kind;
non_trivia_token.range = self.lexer.current_range();
break;
}
Ok(trivia_kind) => {
if trivia_kind.is_newline() {
trailing = false;
non_trivia_token.preceding_line_break = true;
}

self.trivia.push(Trivia::new(
trivia_kind,
self.lexer.current_range(),
trailing,
));
self.trivia_list
.push(Trivia::new(trivia_kind, self.current_range(), trailing));
}
}
}

non_trivia_token
if self.lookahead_offset != 0 {
debug_assert!(self.lookahead_offset >= processed_tokens);
self.lookahead_offset -= processed_tokens;
}
}
}

impl<'source> TokenSource for GraphqlTokenSource<'source> {
type Kind = GraphqlSyntaxKind;

fn current(&self) -> Self::Kind {
self.current.kind
self.lexer.current()
}

fn current_range(&self) -> TextRange {
self.current.range
self.lexer.current_range()
}

fn text(&self) -> &str {
self.lexer.source()
}

fn has_preceding_line_break(&self) -> bool {
self.current.preceding_line_break
self.lexer.has_preceding_line_break()
}

fn bump(&mut self) {
if self.current.kind != EOF {
self.advance_to_next_non_trivia_token(false)
if self.current() != EOF {
self.next_non_trivia_token(false)
}
}

fn skip_as_trivia(&mut self) {
if self.current() != EOF {
self.trivia.push(Trivia::new(
self.trivia_list.push(Trivia::new(
TriviaPieceKind::Skipped,
self.current_range(),
false,
));

self.advance_to_next_non_trivia_token(false)
self.next_non_trivia_token(false)
}
}

fn finish(self) -> (Vec<Trivia>, Vec<ParseDiagnostic>) {
(self.trivia, self.lexer.finish())
(self.trivia_list, self.lexer.finish())
}
}

impl<'source> NthToken for GraphqlTokenSource<'source> {
/// Gets the kind of the nth non-trivia token
fn nth(&mut self, n: usize) -> GraphqlSyntaxKind {
if n == 0 {
self.current()
} else {
self.lookahead(n).map_or(EOF, |lookahead| lookahead.kind)
}
}

/// Returns true if the nth non-trivia token is preceded by a line break
#[inline(always)]
fn has_nth_preceding_line_break(&mut self, n: usize) -> bool {
if n == 0 {
self.has_preceding_line_break()
} else {
self.lookahead(n)
.map_or(false, |lookahead| lookahead.after_newline)
}
}
}

0 comments on commit 1f891f5

Please sign in to comment.