Implement ANTLR v4 Rust runtime#1
Conversation
Copy/Paste DetectionFound 7 duplication(s) across 21 changed Rust file(s) (threshold: 100 tokens). Show duplicationsFound a 27 line (148 tokens) duplication in the following files:
decision_start_index,
precedence,
recovery_symbols: recovery_symbols.clone(),
recovery_state,
};
if let Some(outcomes) = memo.get(&key) {
return outcomes.clone();
}
let visit_key = key.clone();
if !visiting.insert(visit_key.clone()) {
return Vec::new();
}
let Some(state) = atn.state(state_number) else {
visiting.remove(&visit_key);
return Vec::new();
};
let next_decision_start_index = if starts_prediction_decision(state) {
Some(index)
} else {
decision_start_index
};
let (epsilon_recovery_symbols, epsilon_recovery_state) =
next_recovery_context(atn, state, &recovery_symbols, recovery_state);
let mut outcomes = Vec::new();
for transition in &state.transitions {Found a 19 line (120 tokens) duplication in the following files:
pub(crate) fn matching_action_brace(source: &str, mut index: usize) -> Option<usize> {
let mut nested = 0_usize;
let mut double_quoted = false;
let mut escaped = false;
while let Some(ch) = source[index..].chars().next() {
if escaped {
escaped = false;
index += ch.len_utf8();
continue;
}
match ch {
'\\' if double_quoted => escaped = true,
'"' => double_quoted = !double_quoted,
'\'' if !double_quoted => {
if let Some(next_index) = skip_char_literal(source, index) {
index = next_index;
continue;
}
}Found a 15 line (114 tokens) duplication in the following files:
fn parser_matches_token_and_reports_mismatch() {
let source = Source {
tokens: vec![
CommonToken::new(1).with_text("x"),
CommonToken::eof("parser-test", 1, 1, 1),
],
index: 0,
};
let data = RecognizerData::new(
"Mini.g4",
Vocabulary::new([None, Some("'x'")], [None, Some("X")], [None::<&str>, None]),
);
let mut parser = BaseParser::new(CommonTokenStream::new(source), data);
assert_eq!(
parser.match_token(1).expect("token 1 should match").text(),Found a 32 line (113 tokens) duplication in the following files:
outcomes.extend(
self.recognize_state(
atn,
RecognizeRequest {
state_number: *target,
stop_state,
index,
rule_start_index,
decision_start_index: next_decision_start_index,
init_action_rules,
predicates,
rule_args,
member_actions,
return_actions,
local_int_arg,
member_values: member_values.clone(),
return_values: return_values.clone(),
rule_alt_number: next_alt_number,
track_alt_numbers,
consumed_eof,
precedence,
depth: depth + 1,
recovery_symbols: epsilon_recovery_symbols.clone(),
recovery_state: epsilon_recovery_state,
},
visiting,
memo,
expected,
)
.into_iter()
.map(|mut outcome| {
prepend_decision(&mut outcome, decision);Found a 14 line (110 tokens) duplication in the following files:
fn outcome_ties_keep_later_non_recursive_alternative() {
let first = RecognizeOutcome {
index: 1,
consumed_eof: false,
alt_number: 0,
member_values: BTreeMap::new(),
return_values: BTreeMap::new(),
diagnostics: Vec::new(),
decisions: Vec::new(),
actions: vec![ParserAction::new(1, 0, 0, None)],
nodes: vec![RecognizedNode::Token { index: 0 }],
};
let second = RecognizeOutcome {
actions: vec![ParserAction::new(2, 0, 0, None)],Found a 13 line (109 tokens) duplication in the following files:
fn parser_matches_token_and_reports_mismatch() {
let source = Source {
tokens: vec![
CommonToken::new(1).with_text("x"),
CommonToken::eof("parser-test", 1, 1, 1),
],
index: 0,
};
let data = RecognizerData::new(
"Mini.g4",
Vocabulary::new([None, Some("'x'")], [None, Some("X")], [None::<&str>, None]),
);
let mut parser = BaseParser::new(CommonTokenStream::new(source), data);Found a 19 line (101 tokens) duplication in the following files:
let start_state = atn
.rule_to_start_state()
.get(rule_index)
.copied()
.ok_or_else(|| {
AntlrError::Unsupported(format!("rule {rule_index} has no start state"))
})?;
let stop_state = atn
.rule_to_stop_state()
.get(rule_index)
.copied()
.filter(|state| *state != usize::MAX)
.ok_or_else(|| {
AntlrError::Unsupported(format!("rule {rule_index} has no stop state"))
})?;
let start_index = self.current_visible_index();
self.clear_prediction_diagnostics();
let mut visiting = BTreeSet::new(); |
|
Important Review skippedReview was skipped as selected files did not have any reviewable changes. 💤 Files selected but had no reviewable changes (1)
⚙️ Run configurationConfiguration used: defaults Review profile: CHILL Plan: Pro Run ID: 📒 Files selected for processing (1)
You can disable this status message by setting the Use the checkbox below for a quick retry:
📝 WalkthroughWalkthroughAdds a clean-room ANTLR v4 runtime in Rust (streams, tokens, ATN structures, deserializer, DFA/prediction, lexer runtime, parse-tree), generator/tooling support, a testsuite harness, docs, and CI workflows. ChangesCore Runtime
Code generation & tooling
Testsuite & CI
Estimated code review effort🎯 4 (Complex) | ⏱️ ~60 minutes
✨ Finishing Touches🧪 Generate unit tests (beta)
|
There was a problem hiding this comment.
Code Review
This pull request introduces a clean-room implementation of the ANTLR v4 runtime for Rust, including ATN structures, lexer/parser simulators, and a test harness for the upstream ANTLR runtime-testsuite. The implementation focuses on metadata-driven generation and simulation. My review identified several performance and architectural concerns, including inefficient linear lookups in DFA and configuration sets, potential memory allocations in the ATN deserializer, and circular dependencies between modules. Additionally, I flagged a likely typo in the rust-version field and requested clarifications on specific thresholds and stack size configurations used in the test harness.
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 42bd95e3ff
ℹ️ About Codex in GitHub
Your team has set up Codex to review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
Codex can also answer questions or update the PR. Try commenting "@codex address that feedback".
There was a problem hiding this comment.
Actionable comments posted: 8
🧹 Nitpick comments (3)
src/tree.rs (1)
21-21: ⚡ Quick winPrefer
&[&str](orimpl AsRef<str>) forrule_namesin public APIs.Using
&[String]forces avoidable allocations at callsites (especially generated/static rule-name tables). Switching to borrowed string slices improves ergonomics and avoidsto_owned()churn.♻️ Proposed API shape update
- pub fn to_string_tree(&self, rule_names: &[String]) -> String + pub fn to_string_tree(&self, rule_names: &[&str]) -> String - pub fn rule_invocation_stack(&self, rule_index: usize, rule_names: &[String]) -> Option<Vec<String>> + pub fn rule_invocation_stack(&self, rule_index: usize, rule_names: &[&str]) -> Option<Vec<String>> - fn find_rule_path(&self, rule_index: usize, rule_names: &[String], stack: &mut Vec<String>) -> bool + fn find_rule_path(&self, rule_index: usize, rule_names: &[&str], stack: &mut Vec<String>) -> bool - pub fn to_string_tree(&self, rule_names: &[String]) -> String + pub fn to_string_tree(&self, rule_names: &[&str]) -> StringAlso applies to: 90-94, 103-107, 168-169, 269-270
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the rest with a brief reason, keep changes minimal, and validate. In `@src/tree.rs` at line 21, Change the public APIs that currently take rule_names: &[String] (e.g., the method to_string_tree and any other methods that accept rule_names as &[String]) to accept borrowed strings instead—either &[&str] or a generic impl AsRef<str>—so callers can pass static or borrowed names without allocating; update the function signatures accordingly, adjust any internal uses (replace owned String indexing/borrows with &str or call .as_ref()), and update all callsites to pass string slices (or let implicit AsRef conversions handle them) to avoid unnecessary to_owned() allocations.src/bin/antlr4-runtime-testsuite.rs (2)
1498-1518: 💤 Low valueConsider extracting smoke crate templates for improved readability.
The inline format strings in
smoke_mainandparser_smoke_mainare quite long (400-800+ characters), making them difficult to read and modify. Consider extracting these into separate template constants or usingindocmacro for multi-line string literals.♻️ Example approach using constants
const LEXER_SMOKE_TEMPLATE: &str = r#"pub mod generated {{ pub mod {module_name}; }} use antlr4_runtime::{{CommonTokenStream, InputStream{token_source_import}}}; use generated::{module_name}::{type_name}; fn main() {{ let lexer = {type_name}::new(InputStream::new("{input}")); let mut tokens = CommonTokenStream::new(lexer); tokens.fill(); for error in tokens.drain_source_errors() {{ eprintln!("line {{}}:{{}} {{}}", error.line, error.column, error.message); }} for token in tokens.tokens() {{ println!("{{token}}"); }} {dfa_dump}}} "#;This improves readability and makes future template modifications easier to reason about.
Also applies to: 1520-1564
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the rest with a brief reason, keep changes minimal, and validate. In `@src/bin/antlr4-runtime-testsuite.rs` around lines 1498 - 1518, The long inline format strings in smoke_main (and similarly in parser_smoke_main) should be extracted into readable template constants (e.g., LEXER_SMOKE_TEMPLATE / PARSER_SMOKE_TEMPLATE) or use indoc! for a multi-line literal; keep placeholders for {module_name}, {type_name}, {input}, {dfa_dump}, {token_source_import} and then call format!(...) or perform placeholder replacement in smoke_main and parser_smoke_main so the functions construct the final string from the template constants rather than embedding the huge literal inline.
1442-1444: ⚡ Quick winConsider adding timeout protection for subprocess execution.
The
run_outputwrapper callsCommand::output()which blocks indefinitely. If the ANTLR tool, Rust generator, or smoke crate execution hangs (e.g., infinite loop in generated code), the entire test harness will hang without useful diagnostics.For CI robustness, consider using a timeout mechanism:
♻️ Suggested approach using wait_timeout crate or manual implementation
+use std::time::Duration; + +const SUBPROCESS_TIMEOUT: Duration = Duration::from_secs(60); + fn run_output(command: &mut Command) -> io::Result<Output> { - command.output() + let mut child = command.spawn()?; + let start = std::time::Instant::now(); + loop { + match child.try_wait()? { + Some(status) => { + let stdout = child.stdout.take().map(|mut s| { + let mut buf = Vec::new(); + std::io::Read::read_to_end(&mut s, &mut buf).ok(); + buf + }).unwrap_or_default(); + let stderr = child.stderr.take().map(|mut s| { + let mut buf = Vec::new(); + std::io::Read::read_to_end(&mut s, &mut buf).ok(); + buf + }).unwrap_or_default(); + return Ok(Output { status, stdout, stderr }); + } + None if start.elapsed() > SUBPROCESS_TIMEOUT => { + let _ = child.kill(); + return Err(io::Error::new( + io::ErrorKind::TimedOut, + "subprocess timed out", + )); + } + None => std::thread::sleep(Duration::from_millis(100)), + } + } }Note: This simplified approach requires spawning with
Stdio::piped()for stdout/stderr capture. Alternatively, thewait-timeoutcrate provides a cleaner API.🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the rest with a brief reason, keep changes minimal, and validate. In `@src/bin/antlr4-runtime-testsuite.rs` around lines 1442 - 1444, The helper run_output currently calls Command::output() and can block indefinitely; update run_output to enforce a timeout by spawning the process (use Command::spawn with stdout/stderr piped), wait with a timeout (use the wait-timeout crate or implement a manual loop checking child.try_wait with a sleep), and if the timeout expires kill/terminate the child and return an io::Error (or a custom Err variant) describing the timeout and include partial stdout/stderr if available; reference the run_output function and replace the direct call to Command::output() with this spawn/wait/kill flow so CI won't hang on stuck subprocesses.
🤖 Prompt for all review comments with AI agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.
Inline comments:
In @.github/workflows/antlr-runtime-testsuite.yml:
- Around line 26-27: The workflow uses tag-based third-party action refs and
leaves checkout credentials persisted; update the uses: actions/checkout@v4
entry to a fixed commit SHA and add persist-credentials: false under that step,
and similarly replace any other tag-based uses: ... action refs (e.g., the other
third-party action referenced in the job) with their pinned commit SHAs so both
actions are pinned to specific commits rather than tags.
In @.github/workflows/cpd.yml:
- Around line 31-35: The workflow currently uses unpinned action tags
(actions/checkout@v5 and actions/setup-java@v4) and leaves Git credentials
persisted; update both action usages to pinned commit SHAs (replace `@v5` and `@v4`
with the specific commit SHA for each action) to remove supply-chain risk, and
add persist-credentials: false under the actions/checkout invocation (since
subsequent steps don’t need Git creds) to disable credential persistence; ensure
you update both occurrences referenced in the diff (the checkout action and the
setup-java invocation) to their respective SHAs.
In @.github/workflows/publish.yml:
- Around line 22-23: Replace floating action refs with full-length commit SHAs
and disable persistent credentials: change the uses: entry for actions/checkout
(currently actions/checkout@v4) to uses: actions/checkout@<full-commit-sha> and
add a with: block setting persist-credentials: false; do the same pinning for
the other floating action ref referenced in the workflow (the uses: entry around
line 35) by replacing its tag with its full commit SHA so both actions are
pinned to immutable SHAs.
In `@docs/kotlin-build.md`:
- Line 65: The docs use two different entry-rule names—`kotlinFile` and
`parser.kotlin_file()`—which is inconsistent; update the wording so both
references use the same form (prefer matching the Rust API example by replacing
`kotlinFile` with `parser.kotlin_file()`), and ensure any surrounding example
text and mentions consistently reference `parser.kotlin_file()` instead of
`kotlinFile`.
In `@src/bin_support/templates.rs`:
- Around line 161-209: Both matching_action_brace and matching_template_close
only track double-quoted strings and thus mis-handle single-quoted char literals
like '\''{'\''}'; update both functions (matching_action_brace and
matching_template_close) to also track single-quote state: add a separate
quoted_char (or extend quoted) boolean to toggle when encountering '\'' when not
already in a double-quote, treat backslash escapes inside single quotes the same
as inside double quotes (i.e., set escaped = true and skip the next char), and
ignore structural chars ('{','}','<','>') while inside a single-quoted literal
so nesting counters are not affected.
In `@src/prediction.rs`:
- Around line 66-98: The merge function's Empty-with-nonempty branch builds an
Array with parents [left, right] but uses EMPTY_RETURN_STATE for both
return_states, breaking the (parent, return_state) alignment invariant; update
merge (or the Empty case) to produce aligned pairs instead — either call
collect_entries on the non-empty context and prepend the empty context entry(s)
correctly, or explicitly extract the real return_state from a Singleton (and
from any non-empty) instead of using EMPTY_RETURN_STATE for both; reference the
merge function, the Array enum construction, EMPTY_RETURN_STATE, and
collect_entries to locate and fix the logic so parents[i] aligns with
return_states[i].
In `@src/token_stream.rs`:
- Around line 52-71: lt(1) can return a hidden token because when offset==1 the
loop that applies channel filtering doesn't run; fix by initializing the forward
search using next_token_on_channel instead of taking self.cursor raw.
Specifically, in the positive-offset branch of lt(), set let mut index =
self.next_token_on_channel(self.cursor, self.channel) (or call
next_token_on_channel(self.cursor + 1, ...) if the helper expects the next
index) and then run the remaining-1 iterations with index =
self.next_token_on_channel(index + 1, self.channel), then call self.sync(index)
and return self.tokens.get(index); leave the negative/ lb branch unchanged. This
ensures lt, next_token_on_channel, sync, tokens, cursor and channel behave
correctly for offset==1.
In `@tool/src/org/antlr/v4/codegen/target/RustTarget.java`:
- Around line 24-33: The RUST_KEYWORDS array is missing the reserved word
"union", which allows problematic grammar symbols to slip past
visibleGrammarSymbolCausesIssueInGeneratedCode(); update the RUST_KEYWORDS
initializer to include "union" (add "union" among the string entries) and also
apply the same addition to any duplicate keyword lists referenced elsewhere in
this file (the other keyword array around the same area) so all checks use the
complete Rust reserved-word set.
---
Nitpick comments:
In `@src/bin/antlr4-runtime-testsuite.rs`:
- Around line 1498-1518: The long inline format strings in smoke_main (and
similarly in parser_smoke_main) should be extracted into readable template
constants (e.g., LEXER_SMOKE_TEMPLATE / PARSER_SMOKE_TEMPLATE) or use indoc! for
a multi-line literal; keep placeholders for {module_name}, {type_name}, {input},
{dfa_dump}, {token_source_import} and then call format!(...) or perform
placeholder replacement in smoke_main and parser_smoke_main so the functions
construct the final string from the template constants rather than embedding the
huge literal inline.
- Around line 1442-1444: The helper run_output currently calls Command::output()
and can block indefinitely; update run_output to enforce a timeout by spawning
the process (use Command::spawn with stdout/stderr piped), wait with a timeout
(use the wait-timeout crate or implement a manual loop checking child.try_wait
with a sleep), and if the timeout expires kill/terminate the child and return an
io::Error (or a custom Err variant) describing the timeout and include partial
stdout/stderr if available; reference the run_output function and replace the
direct call to Command::output() with this spawn/wait/kill flow so CI won't hang
on stuck subprocesses.
In `@src/tree.rs`:
- Line 21: Change the public APIs that currently take rule_names: &[String]
(e.g., the method to_string_tree and any other methods that accept rule_names as
&[String]) to accept borrowed strings instead—either &[&str] or a generic impl
AsRef<str>—so callers can pass static or borrowed names without allocating;
update the function signatures accordingly, adjust any internal uses (replace
owned String indexing/borrows with &str or call .as_ref()), and update all
callsites to pass string slices (or let implicit AsRef conversions handle them)
to avoid unnecessary to_owned() allocations.
🪄 Autofix (Beta)
Fix all unresolved CodeRabbit comments on this PR:
- Push a commit to this branch (recommended)
- Create a new PR with the fixes
ℹ️ Review info
⚙️ Run configuration
Configuration used: defaults
Review profile: CHILL
Plan: Pro
Run ID: b240277e-b3ae-4e02-b43a-2cd95afb6dc2
⛔ Files ignored due to path filters (1)
Cargo.lockis excluded by!**/*.lock
📒 Files selected for processing (36)
.clippy.toml.github/workflows/antlr-runtime-testsuite.yml.github/workflows/ci.yml.github/workflows/cpd.yml.github/workflows/publish.yml.gitignoreCargo.tomlLICENSEREADME.mddocs/kotlin-build.mddocs/requirements.mddocs/runtime-testsuite.mdsrc/atn/lexer.rssrc/atn/mod.rssrc/atn/serialized.rssrc/bin/antlr4-runtime-testsuite.rssrc/bin/antlr4-rust-gen.rssrc/bin_support/rust_names.rssrc/bin_support/templates.rssrc/char_stream.rssrc/dfa.rssrc/errors.rssrc/generated.rssrc/int_stream.rssrc/lexer.rssrc/lib.rssrc/parser.rssrc/prediction.rssrc/recognizer.rssrc/token.rssrc/token_stream.rssrc/tree.rssrc/vocabulary.rstool/README.mdtool/resources/org/antlr/v4/tool/templates/codegen/Rust/Rust.stgtool/src/org/antlr/v4/codegen/target/RustTarget.java
|
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 1a98bd3bbc
ℹ️ About Codex in GitHub
Your team has set up Codex to review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
Codex can also answer questions or update the PR. Try commenting "@codex address that feedback".
There was a problem hiding this comment.
Actionable comments posted: 2
Caution
Some comments are outside the diff and can’t be posted inline due to platform limitations.
⚠️ Outside diff range comments (1)
src/prediction.rs (1)
116-122:⚠️ Potential issue | 🟠 Major | ⚡ Quick winUse a key type for
(state, alt, context)uniqueness instead of derivingEq/Ordon the fullAtnConfig.
AtnConfigcurrently derivesEq, Ord, soBTreeSet<AtnConfig>includesreaches_into_outer_contextin membership checks. This contradicts the documented behavior at line 149 that claims uniqueness is based only on(state, alt, context). Two configs with identical(state, alt, context)but differentreaches_into_outer_contextvalues can both be inserted, violating the documented contract. Thereaches_into_outer_contextfield is semantically separate (used only to track thedips_into_outer_contextflag) and should not affect config identity.🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the rest with a brief reason, keep changes minimal, and validate. In `@src/prediction.rs` around lines 116 - 122, AtnConfig currently derives Eq/Ord which incorrectly includes reaches_into_outer_context in identity checks; change identity to only (state, alt, context) by introducing a separate key type (e.g., AtnConfigKey or ConfigKey) that contains state: usize, alt: usize, context: Rc<PredictionContext> and implements Eq/Ord/Hash based solely on those fields, then use BTreeSet<ConfigKey> (or store ConfigKey alongside AtnConfig in sets/maps) for membership/uniqueness operations instead of BTreeSet<AtnConfig>, leaving reaches_into_outer_context on AtnConfig solely for tracking dips_into_outer_context. Ensure all places that rely on set membership or ordering (search for usages of AtnConfig in set/map operations) are updated to use the new key type.
🤖 Prompt for all review comments with AI agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.
Inline comments:
In `@src/bin_support/templates.rs`:
- Around line 165-178: The loop currently treats every single-quote as toggling
single_quoted, which misclassifies Rust lifetimes like &'a and <'input> as
quoted; change the '\'' match to peek the following char(s) and only toggle
single_quoted when the pattern is a real char literal (e.g., opening '\''
followed by either a backslash or a non-identifier char and then a closing '\'')
— if the next char is an identifier-start (ASCII letter or '_') treat it as a
lifetime (do not set single_quoted and do not swallow subsequent delimiters).
Update the same logic at the other occurrences you noted (around the other
blocks) and add unit tests that assert &'a str and <'input> are NOT treated as
quoted while existing char-literal tests still pass.
In `@src/prediction.rs`:
- Around line 76-81: The current dedup logic sorts entries only by return_state
then calls entries.dedup_by, which can miss duplicates that are non-adjacent
when parents differ in ordering; change the sort to use both return_state and
parent (e.g., sort by key (return_state, parent)) before calling
entries.dedup_by or replace the whole approach by building a HashSet keyed by
(parent, return_state) while collecting; update the usage around
collect_entries, entries.sort_by_key, and entries.dedup_by accordingly so
duplicates are detected using both fields.
---
Outside diff comments:
In `@src/prediction.rs`:
- Around line 116-122: AtnConfig currently derives Eq/Ord which incorrectly
includes reaches_into_outer_context in identity checks; change identity to only
(state, alt, context) by introducing a separate key type (e.g., AtnConfigKey or
ConfigKey) that contains state: usize, alt: usize, context:
Rc<PredictionContext> and implements Eq/Ord/Hash based solely on those fields,
then use BTreeSet<ConfigKey> (or store ConfigKey alongside AtnConfig in
sets/maps) for membership/uniqueness operations instead of BTreeSet<AtnConfig>,
leaving reaches_into_outer_context on AtnConfig solely for tracking
dips_into_outer_context. Ensure all places that rely on set membership or
ordering (search for usages of AtnConfig in set/map operations) are updated to
use the new key type.
🪄 Autofix (Beta)
Fix all unresolved CodeRabbit comments on this PR:
- Push a commit to this branch (recommended)
- Create a new PR with the fixes
ℹ️ Review info
⚙️ Run configuration
Configuration used: defaults
Review profile: CHILL
Plan: Pro
Run ID: 0cd33e19-2f2f-44e8-a9dc-433bce954a69
📒 Files selected for processing (10)
.github/workflows/antlr-runtime-testsuite.yml.github/workflows/ci.yml.github/workflows/cpd.yml.github/workflows/publish.ymldocs/kotlin-build.mdsrc/bin_support/templates.rssrc/parser.rssrc/prediction.rssrc/token_stream.rstool/src/org/antlr/v4/codegen/target/RustTarget.java
✅ Files skipped from review due to trivial changes (1)
- docs/kotlin-build.md
There was a problem hiding this comment.
🧹 Nitpick comments (1)
src/prediction.rs (1)
233-246: ⚡ Quick winStrengthen dedup regression assertions beyond length.
assert_eq!(merged.len(), 2)can pass even if the wrong duplicate is retained. Please assert the expected(parent, return_state)pairs too.✅ Suggested test hardening
#[test] fn merge_deduplicates_entries_with_same_parent_and_return_state() { let empty = PredictionContext::empty(); let parent_one = PredictionContext::singleton(Rc::clone(&empty), 1); let parent_two = PredictionContext::singleton(Rc::clone(&empty), 2); let left = Rc::new(PredictionContext::Array { - parents: vec![Rc::clone(&parent_one), parent_two], + parents: vec![Rc::clone(&parent_one), Rc::clone(&parent_two)], return_states: vec![42, 42], }); let right = PredictionContext::singleton(Rc::clone(&parent_one), 42); let merged = PredictionContext::merge(left, right); assert_eq!(merged.len(), 2); + let pairs = [ + (merged.parent(0), merged.return_state(0)), + (merged.parent(1), merged.return_state(1)), + ]; + assert!(pairs.contains(&(Some(Rc::clone(&parent_one)), Some(42)))); + assert!(pairs.contains(&(Some(parent_two), Some(42)))); }🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the rest with a brief reason, keep changes minimal, and validate. In `@src/prediction.rs` around lines 233 - 246, The test merge_deduplicates_entries_with_same_parent_and_return_state currently only asserts merged.len() == 2 which can hide which duplicate was retained; update the test to explicitly assert the expected (parent, return_state) pairs after calling PredictionContext::merge(left, right). Locate the test (function merge_deduplicates_entries_with_same_parent_and_return_state) and add assertions that inspect the merged PredictionContext entries (parents and return_states fields or through public accessors on PredictionContext) to verify that the retained entries are the correct parent Rc pointers (e.g., parent_one and parent_two via Rc::ptr_eq or equivalent) and their associated return state values (42 and the other expected value), rather than relying solely on length.
🤖 Prompt for all review comments with AI agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.
Nitpick comments:
In `@src/prediction.rs`:
- Around line 233-246: The test
merge_deduplicates_entries_with_same_parent_and_return_state currently only
asserts merged.len() == 2 which can hide which duplicate was retained; update
the test to explicitly assert the expected (parent, return_state) pairs after
calling PredictionContext::merge(left, right). Locate the test (function
merge_deduplicates_entries_with_same_parent_and_return_state) and add assertions
that inspect the merged PredictionContext entries (parents and return_states
fields or through public accessors on PredictionContext) to verify that the
retained entries are the correct parent Rc pointers (e.g., parent_one and
parent_two via Rc::ptr_eq or equivalent) and their associated return state
values (42 and the other expected value), rather than relying solely on length.
ℹ️ Review info
⚙️ Run configuration
Configuration used: defaults
Review profile: CHILL
Plan: Pro
Run ID: cd586c51-10a0-4c39-b9f9-a5230857a03a
📒 Files selected for processing (3)
src/bin_support/templates.rssrc/parser.rssrc/prediction.rs
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: c128f43017
ℹ️ About Codex in GitHub
Your team has set up Codex to review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
Codex can also answer questions or update the PR. Try commenting "@codex address that feedback".
Summary
antlr4-rust-genfor generating Rust modules from ANTLR.interpmetadata and anantlr4-runtime-testsuiteharness for upstream ANTLR runtime descriptor conformance.Validation
cargo fmt --checkcargo clippy --locked --all-targets --all-features -- -D warningscargo test --locked357 passed, 0 failed, 0 skipped, 357 runNotes
PMD CPD currently returns recoverable status
5because PMD's Rust lexer chokes on the readable'<EOF>'token-display assertion insrc/token.rs, but it still emits the duplication report. The workflow treats status5as recoverable for that reason.Summary by CodeRabbit
Release Notes
New Features
Documentation
Tests
Chores