Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
3a11745
perf(parser): incremental wins on recursive recognizer hot path
tinovyatkin May 24, 2026
c310dd2
internal: add PredictionFxHasher for hot-path FxHash maps
tinovyatkin May 25, 2026
d1cc835
perf(lexer): Rc-wrap cached DFA state to avoid Vec<ConfigKey> clone
tinovyatkin May 24, 2026
0d99ed8
perf(lexer): switch closure-state seen set from BTreeSet to FxHashSet
tinovyatkin May 24, 2026
a16fab4
perf(lexer): swap LexerDfaTrace BTreeMap fields for FxHashMap
tinovyatkin May 24, 2026
6014aad
perf-counters: track outcome list length distribution
tinovyatkin May 24, 2026
8587fd7
perf(parser): inline single-atom-match transitions in fast loop
tinovyatkin May 24, 2026
15cbcca
perf(parser): skip diagnostics merge when child has no diagnostics
tinovyatkin May 24, 2026
74936d1
perf(parser): intern empty NodeList tail to avoid Rc allocation
tinovyatkin May 24, 2026
579e89e
perf(parser): per-instance decision lookahead cache
tinovyatkin May 24, 2026
c240e5c
perf(parser): LL(1) early-commit when FIRST sets are disjoint
tinovyatkin May 24, 2026
dcc2921
perf(parser): inline-scan dedupe instead of BTreeSet for small outcom…
tinovyatkin May 24, 2026
545ae19
perf(parser): cache LL(1) alt selection per (state, lookahead)
tinovyatkin May 24, 2026
e10bef2
perf(parser): inline single-node NodeList variant to skip Rc<NodeList…
tinovyatkin May 24, 2026
cfd5cd9
fix(parser): dedupe overflow when >8 distinct outcomes per visit
tinovyatkin May 25, 2026
ba515eb
review(prediction): use i32::cast_unsigned for FCP consistency
tinovyatkin May 25, 2026
e9d20c1
review(parser): clear ll1_decision_cache on parse reset
tinovyatkin May 25, 2026
a5b2f09
review(parser): use rule_first_set return value directly
tinovyatkin May 25, 2026
11b2139
review(parser): use exact (state, index) pair for cycle-guard
tinovyatkin May 25, 2026
c704581
review(parser): scope SHARED_ATN_CACHES key to grammar identity
tinovyatkin May 25, 2026
d5b2fe1
chore: improve CLAUDE.md
tinovyatkin May 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ path = "src/lib.rs"
[features]
default = []
std = []
perf-counters = []

[dependencies]
thiserror = "2"
Expand Down
15 changes: 10 additions & 5 deletions src/atn/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::collections::BTreeSet;
use std::collections::{BTreeSet, HashSet};
use std::hash::BuildHasherDefault;

use crate::atn::{Atn, AtnStateKind, LexerAction, Transition};
use crate::char_stream::{CharStream, TextInterval};
Expand All @@ -7,12 +8,16 @@ use crate::lexer::{
BaseLexer, Lexer, LexerCustomAction, LexerDfaActionKey, LexerDfaCachedAccept,
LexerDfaCachedState, LexerDfaCachedTransition, LexerDfaConfigKey, LexerDfaKey, LexerPredicate,
};
use crate::prediction::PredictionFxHasher;
use crate::token::{CommonToken, DEFAULT_CHANNEL, INVALID_TOKEN_TYPE, TokenFactory};

#[allow(clippy::disallowed_types)]
type FxHashSet<K> = HashSet<K, BuildHasherDefault<PredictionFxHasher>>;

const MIN_CHAR_VALUE: i32 = 0;
const MAX_CHAR_VALUE: i32 = 0x0010_FFFF;

#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
struct LexerConfig {
state: usize,
position: usize,
Expand All @@ -23,7 +28,7 @@ struct LexerConfig {
actions: Vec<LexerActionTrace>,
}

#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
struct LexerActionTrace {
action_index: usize,
position: usize,
Expand Down Expand Up @@ -100,7 +105,7 @@ impl LexerActionResult {
/// Accumulates one epsilon-closure expansion, including whether predicate
/// evaluation made the closure input-position-sensitive.
struct ClosureState {
seen: BTreeSet<LexerConfig>,
seen: FxHashSet<LexerConfig>,
closed: Vec<LexerConfig>,
has_semantic_context: bool,
}
Expand Down Expand Up @@ -723,7 +728,7 @@ where
P: FnMut(&BaseLexer<I, F>, LexerPredicate) -> bool,
{
let mut state = ClosureState {
seen: BTreeSet::new(),
seen: FxHashSet::default(),
closed: Vec::new(),
has_semantic_context: false,
};
Expand Down
16 changes: 13 additions & 3 deletions src/atn/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,19 @@ impl IntervalSet {

/// Returns true when `value` falls inside any stored interval.
pub fn contains(&self, value: i32) -> bool {
self.ranges
.iter()
.any(|(start, stop)| (*start..=*stop).contains(&value))
// Ranges are kept sorted and coalesced by `normalize`, so the first
// range whose `start > value` cannot contain `value` and neither can
// any range after it. Binary searching for that boundary turns
// membership lookup from O(n) to O(log n), which matters because
// parser/lexer hot paths call this once per `Set`/`NotSet`/`Wildcard`
// transition probe.
match self
.ranges
.binary_search_by(|(start, _)| start.cmp(&value))
{
Ok(_) => true,
Err(pos) => pos > 0 && self.ranges[pos - 1].1 >= value,
}
}

pub fn ranges(&self) -> &[(i32, i32)] {
Expand Down
44 changes: 25 additions & 19 deletions src/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
use std::collections::{BTreeMap, BTreeSet};
use std::collections::{BTreeSet, HashMap};
use std::hash::BuildHasherDefault;
use std::rc::Rc;

use crate::char_stream::{CharStream, TextInterval};
use crate::int_stream::EOF;
use crate::prediction::PredictionFxHasher;
use crate::recognizer::{Recognizer, RecognizerData};
use crate::token::{CommonToken, CommonTokenFactory, TokenFactory, TokenSourceError, TokenSpec};

#[allow(clippy::disallowed_types)]
type FxHashMap<K, V> = HashMap<K, V, BuildHasherDefault<PredictionFxHasher>>;

pub const SKIP: i32 = -3;
pub const MORE: i32 = -2;
pub const DEFAULT_MODE: i32 = 0;
Expand Down Expand Up @@ -113,29 +119,29 @@ pub struct BaseLexer<I, F = CommonTokenFactory> {
/// runtime-suite descriptors.
#[derive(Clone, Debug, Default)]
struct LexerDfaTrace {
state_numbers: BTreeMap<LexerDfaKey, usize>,
accept_predictions: BTreeMap<usize, i32>,
state_numbers: FxHashMap<LexerDfaKey, usize>,
accept_predictions: FxHashMap<usize, i32>,
edges: BTreeSet<LexerDfaEdge>,
cached_states: BTreeMap<usize, LexerDfaCachedState>,
transitions: BTreeMap<(usize, i32), LexerDfaCachedTransition>,
mode_starts: BTreeMap<i32, usize>,
cached_states: FxHashMap<usize, Rc<LexerDfaCachedState>>,
transitions: FxHashMap<(usize, i32), LexerDfaCachedTransition>,
mode_starts: FxHashMap<i32, usize>,
Comment thread
tinovyatkin marked this conversation as resolved.
}

impl LexerDfaTrace {
const fn new() -> Self {
fn new() -> Self {
Self {
state_numbers: BTreeMap::new(),
accept_predictions: BTreeMap::new(),
state_numbers: FxHashMap::default(),
accept_predictions: FxHashMap::default(),
edges: BTreeSet::new(),
cached_states: BTreeMap::new(),
transitions: BTreeMap::new(),
mode_starts: BTreeMap::new(),
cached_states: FxHashMap::default(),
transitions: FxHashMap::default(),
mode_starts: FxHashMap::default(),
}
}
}

/// Normalized lexer ATN config-set identity used for observed DFA traces.
#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub(crate) struct LexerDfaKey {
configs: Vec<LexerDfaConfigKey>,
}
Expand All @@ -148,7 +154,7 @@ impl LexerDfaKey {
}

/// One lexer ATN config identity with the absolute input position removed.
#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub(crate) struct LexerDfaConfigKey {
pub(crate) state: usize,
pub(crate) alt_rule_index: Option<usize>,
Expand All @@ -158,7 +164,7 @@ pub(crate) struct LexerDfaConfigKey {
pub(crate) actions: Vec<LexerDfaActionKey>,
}

#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub(crate) struct LexerDfaActionKey {
pub(crate) action_index: usize,
pub(crate) position_delta: usize,
Expand Down Expand Up @@ -220,7 +226,7 @@ where
I: CharStream,
{
/// Creates a lexer base using `CommonTokenFactory`.
pub const fn new(input: I, data: RecognizerData) -> Self {
pub fn new(input: I, data: RecognizerData) -> Self {
Self::with_factory(input, data, CommonTokenFactory)
}
}
Expand All @@ -231,7 +237,7 @@ where
F: TokenFactory,
{
/// Creates a lexer base with a custom token factory.
pub const fn with_factory(input: I, data: RecognizerData, factory: F) -> Self {
pub fn with_factory(input: I, data: RecognizerData, factory: F) -> Self {
Self {
input,
data,
Expand Down Expand Up @@ -530,7 +536,7 @@ where
.or_insert(transition);
}

pub(crate) fn cached_lexer_dfa_state(&self, state: usize) -> Option<LexerDfaCachedState> {
pub(crate) fn cached_lexer_dfa_state(&self, state: usize) -> Option<Rc<LexerDfaCachedState>> {
self.lexer_dfa.cached_states.get(&state).cloned()
}

Expand All @@ -542,7 +548,7 @@ where
self.lexer_dfa
.cached_states
.entry(state)
.or_insert(cached_state);
.or_insert_with(|| Rc::new(cached_state));
}

pub(crate) fn cached_lexer_mode_start(&self, mode: i32) -> Option<usize> {
Expand Down
Loading
Loading