From ac8854f6a2077bcc01e7210b515634573d54949b Mon Sep 17 00:00:00 2001 From: "Jim.Idle" Date: Wed, 31 Aug 2022 14:44:19 +0800 Subject: [PATCH 1/3] feat: #3840 Move Go to version v4.11.0 Signed-off-by: Jim.Idle --- runtime/Go/antlr/go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/Go/antlr/go.mod b/runtime/Go/antlr/go.mod index 1646d8b13e..0d8feb6214 100644 --- a/runtime/Go/antlr/go.mod +++ b/runtime/Go/antlr/go.mod @@ -1,4 +1,4 @@ -module github.com/antlr/antlr4/runtime/Go/antlr +module github.com/antlr/antlr4/runtime/Go/antlr/v4 go 1.18 From 69fdc2f84f16c5a8d8cbc82aa7ccf7de5806898d Mon Sep 17 00:00:00 2001 From: "Jim.Idle" Date: Wed, 31 Aug 2022 15:18:27 +0800 Subject: [PATCH 2/3] feat: #3840 Create the v4 version of the Go runtime Signed-off-by: Jim.Idle --- runtime/Go/antlr/go.mod | 2 +- runtime/Go/antlr/v4/LICENSE | 26 + runtime/Go/antlr/v4/antlrdoc.go | 68 + runtime/Go/antlr/v4/atn.go | 176 ++ runtime/Go/antlr/v4/atn_config.go | 303 ++++ runtime/Go/antlr/v4/atn_config_set.go | 439 +++++ .../antlr/v4/atn_deserialization_options.go | 61 + runtime/Go/antlr/v4/atn_deserializer.go | 683 ++++++++ runtime/Go/antlr/v4/atn_simulator.go | 50 + runtime/Go/antlr/v4/atn_state.go | 393 +++++ runtime/Go/antlr/v4/atn_type.go | 11 + runtime/Go/antlr/v4/atnconfigset_test.go | 73 + runtime/Go/antlr/v4/char_stream.go | 12 + runtime/Go/antlr/v4/common_token_factory.go | 56 + runtime/Go/antlr/v4/common_token_stream.go | 449 +++++ .../Go/antlr/v4/common_token_stream_test.go | 178 ++ runtime/Go/antlr/v4/comparators.go | 137 ++ runtime/Go/antlr/v4/dfa.go | 148 ++ runtime/Go/antlr/v4/dfa_serializer.go | 158 ++ runtime/Go/antlr/v4/dfa_state.go | 169 ++ .../Go/antlr/v4/diagnostic_error_listener.go | 109 ++ runtime/Go/antlr/v4/error_listener.go | 104 ++ runtime/Go/antlr/v4/error_strategy.go | 734 ++++++++ runtime/Go/antlr/v4/errors.go | 238 +++ runtime/Go/antlr/v4/file_stream.go | 49 + runtime/Go/antlr/v4/go.mod | 5 + runtime/Go/antlr/v4/input_stream.go | 113 ++ runtime/Go/antlr/v4/int_stream.go | 16 + runtime/Go/antlr/v4/interval_set.go | 312 ++++ runtime/Go/antlr/v4/jcollect.go | 195 +++ runtime/Go/antlr/v4/jcollect_test.go | 15 + runtime/Go/antlr/v4/lexer.go | 416 +++++ runtime/Go/antlr/v4/lexer_action.go | 432 +++++ runtime/Go/antlr/v4/lexer_action_executor.go | 186 ++ runtime/Go/antlr/v4/lexer_atn_simulator.go | 684 ++++++++ runtime/Go/antlr/v4/ll1_analyzer.go | 216 +++ runtime/Go/antlr/v4/parser.go | 708 ++++++++ runtime/Go/antlr/v4/parser_atn_simulator.go | 1549 +++++++++++++++++ runtime/Go/antlr/v4/parser_rule_context.go | 362 ++++ runtime/Go/antlr/v4/prediction_context.go | 764 ++++++++ runtime/Go/antlr/v4/prediction_mode.go | 529 ++++++ runtime/Go/antlr/v4/recognizer.go | 216 +++ runtime/Go/antlr/v4/rule_context.go | 114 ++ runtime/Go/antlr/v4/semantic_context.go | 469 +++++ runtime/Go/antlr/v4/testing_assert_test.go | 98 ++ runtime/Go/antlr/v4/testing_lexer_b_test.go | 137 ++ runtime/Go/antlr/v4/testing_util_test.go | 30 + runtime/Go/antlr/v4/token.go | 209 +++ runtime/Go/antlr/v4/token_source.go | 17 + runtime/Go/antlr/v4/token_stream.go | 20 + runtime/Go/antlr/v4/tokenstream_rewriter.go | 659 +++++++ .../Go/antlr/v4/tokenstream_rewriter_test.go | 417 +++++ runtime/Go/antlr/v4/trace_listener.go | 32 + runtime/Go/antlr/v4/transition.go | 428 +++++ runtime/Go/antlr/v4/tree.go | 253 +++ runtime/Go/antlr/v4/trees.go | 138 ++ runtime/Go/antlr/v4/utils.go | 352 ++++ runtime/Go/antlr/v4/utils_set.go | 235 +++ runtime/Go/antlr/v4/utils_test.go | 62 + 59 files changed, 15213 insertions(+), 1 deletion(-) create mode 100644 runtime/Go/antlr/v4/LICENSE create mode 100644 runtime/Go/antlr/v4/antlrdoc.go create mode 100644 runtime/Go/antlr/v4/atn.go create mode 100644 runtime/Go/antlr/v4/atn_config.go create mode 100644 runtime/Go/antlr/v4/atn_config_set.go create mode 100644 runtime/Go/antlr/v4/atn_deserialization_options.go create mode 100644 runtime/Go/antlr/v4/atn_deserializer.go create mode 100644 runtime/Go/antlr/v4/atn_simulator.go create mode 100644 runtime/Go/antlr/v4/atn_state.go create mode 100644 runtime/Go/antlr/v4/atn_type.go create mode 100644 runtime/Go/antlr/v4/atnconfigset_test.go create mode 100644 runtime/Go/antlr/v4/char_stream.go create mode 100644 runtime/Go/antlr/v4/common_token_factory.go create mode 100644 runtime/Go/antlr/v4/common_token_stream.go create mode 100644 runtime/Go/antlr/v4/common_token_stream_test.go create mode 100644 runtime/Go/antlr/v4/comparators.go create mode 100644 runtime/Go/antlr/v4/dfa.go create mode 100644 runtime/Go/antlr/v4/dfa_serializer.go create mode 100644 runtime/Go/antlr/v4/dfa_state.go create mode 100644 runtime/Go/antlr/v4/diagnostic_error_listener.go create mode 100644 runtime/Go/antlr/v4/error_listener.go create mode 100644 runtime/Go/antlr/v4/error_strategy.go create mode 100644 runtime/Go/antlr/v4/errors.go create mode 100644 runtime/Go/antlr/v4/file_stream.go create mode 100644 runtime/Go/antlr/v4/go.mod create mode 100644 runtime/Go/antlr/v4/input_stream.go create mode 100644 runtime/Go/antlr/v4/int_stream.go create mode 100644 runtime/Go/antlr/v4/interval_set.go create mode 100644 runtime/Go/antlr/v4/jcollect.go create mode 100644 runtime/Go/antlr/v4/jcollect_test.go create mode 100644 runtime/Go/antlr/v4/lexer.go create mode 100644 runtime/Go/antlr/v4/lexer_action.go create mode 100644 runtime/Go/antlr/v4/lexer_action_executor.go create mode 100644 runtime/Go/antlr/v4/lexer_atn_simulator.go create mode 100644 runtime/Go/antlr/v4/ll1_analyzer.go create mode 100644 runtime/Go/antlr/v4/parser.go create mode 100644 runtime/Go/antlr/v4/parser_atn_simulator.go create mode 100644 runtime/Go/antlr/v4/parser_rule_context.go create mode 100644 runtime/Go/antlr/v4/prediction_context.go create mode 100644 runtime/Go/antlr/v4/prediction_mode.go create mode 100644 runtime/Go/antlr/v4/recognizer.go create mode 100644 runtime/Go/antlr/v4/rule_context.go create mode 100644 runtime/Go/antlr/v4/semantic_context.go create mode 100644 runtime/Go/antlr/v4/testing_assert_test.go create mode 100644 runtime/Go/antlr/v4/testing_lexer_b_test.go create mode 100644 runtime/Go/antlr/v4/testing_util_test.go create mode 100644 runtime/Go/antlr/v4/token.go create mode 100644 runtime/Go/antlr/v4/token_source.go create mode 100644 runtime/Go/antlr/v4/token_stream.go create mode 100644 runtime/Go/antlr/v4/tokenstream_rewriter.go create mode 100644 runtime/Go/antlr/v4/tokenstream_rewriter_test.go create mode 100644 runtime/Go/antlr/v4/trace_listener.go create mode 100644 runtime/Go/antlr/v4/transition.go create mode 100644 runtime/Go/antlr/v4/tree.go create mode 100644 runtime/Go/antlr/v4/trees.go create mode 100644 runtime/Go/antlr/v4/utils.go create mode 100644 runtime/Go/antlr/v4/utils_set.go create mode 100644 runtime/Go/antlr/v4/utils_test.go diff --git a/runtime/Go/antlr/go.mod b/runtime/Go/antlr/go.mod index 0d8feb6214..1646d8b13e 100644 --- a/runtime/Go/antlr/go.mod +++ b/runtime/Go/antlr/go.mod @@ -1,4 +1,4 @@ -module github.com/antlr/antlr4/runtime/Go/antlr/v4 +module github.com/antlr/antlr4/runtime/Go/antlr go 1.18 diff --git a/runtime/Go/antlr/v4/LICENSE b/runtime/Go/antlr/v4/LICENSE new file mode 100644 index 0000000000..52cf18e425 --- /dev/null +++ b/runtime/Go/antlr/v4/LICENSE @@ -0,0 +1,26 @@ +Copyright 2021 The ANTLR Project + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/runtime/Go/antlr/v4/antlrdoc.go b/runtime/Go/antlr/v4/antlrdoc.go new file mode 100644 index 0000000000..4d7825f965 --- /dev/null +++ b/runtime/Go/antlr/v4/antlrdoc.go @@ -0,0 +1,68 @@ +/* +Package antlr implements the Go version of the ANTLR 4 runtime. + +# The ANTLR Tool + +ANTLR (ANother Tool for Language Recognition) is a powerful parser generator for reading, processing, executing, +or translating structured text or binary files. It's widely used to build languages, tools, and frameworks. +From a grammar, ANTLR generates a parser that can build parse trees and also generates a listener interface +(or visitor) that makes it easy to respond to the recognition of phrases of interest. + +# Code Generation + +ANTLR supports the generation of code in a number of [target languages], and the generated code is supported by a +runtime library, written specifically to support the generated code in the target language. This library is the +runtime for the Go target. + +To generate code for the go target, it is generally recommended to place the source grammar files in a package of +their own, and use the `.sh` script method of generating code, using the go generate directive. In that same directory +it is usual, though not required, to place the antlr tool that should be used to generate the code. That does mean +that the antlr tool JAR file will be checked in to your source code control though, so you are free to use any other +way of specifying the version of the ANTLR tool to use, such as aliasing in `.zshrc` or equivalent, or a profile in +your IDE, or configuration in your CI system. + +Here is a general template for an ANTLR based recognizer in Go: + + . + ├── myproject + ├── parser + │ ├── mygrammar.g4 + │ ├── antlr-4.11.0-complete.jar + │ ├── error_listeners.go + │ ├── generate.go + │ ├── generate.sh + ├── go.mod + ├── go.sum + ├── main.go + └── main_test.go + +Make sure that the package statement in your grammar file(s) reflects the go package they exist in. +The generate.go file then looks like this: + + package parser + + //go:generate ./generate.sh + +And the generate.sh file will look similar to this: + + #!/bin/sh + + alias antlr4='java -Xmx500M -cp "./antlr4-4.11.0-complete.jar:$CLASSPATH" org.antlr.v4.Tool' + antlr4 -Dlanguage=Go -no-visitor -package tgram *.g4 + +depending on whether you want visitors or listeners or any other ANTLR options. + +From the command line at the root of your package “myproject” you can then simply issue the command: + + go generate ./... + +# Copyright Notice + +Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. + +Use of this file is governed by the BSD 3-clause license, which can be found in the [LICENSE.txt] file in the project root. + +[target languages]: https://github.com/antlr/antlr4/tree/master/runtime +[LICENSE.txt]: https://github.com/antlr/antlr4/blob/master/LICENSE.txt +*/ +package antlr diff --git a/runtime/Go/antlr/v4/atn.go b/runtime/Go/antlr/v4/atn.go new file mode 100644 index 0000000000..98010d2e6e --- /dev/null +++ b/runtime/Go/antlr/v4/atn.go @@ -0,0 +1,176 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "sync" + +// ATNInvalidAltNumber is used to represent an ALT number that has yet to be calculated or +// which is invalid for a particular struct such as [*antlr.BaseRuleContext] +var ATNInvalidAltNumber int + +// ATN represents an “[Augmented Transition Network]”, though general in ANTLR the term +// “Augmented Recursive Transition Network” though there are some descriptions of “[Recursive Transition Network]” +// in existence. +// +// ATNs represent the main networks in the system and are serialized by the code generator and support [ALL(*)]. +// +// [Augmented Transition Network]: https://en.wikipedia.org/wiki/Augmented_transition_network +// [ALL(*)]: https://www.antlr.org/papers/allstar-techreport.pdf +// [Recursive Transition Network]: https://en.wikipedia.org/wiki/Recursive_transition_network +type ATN struct { + // DecisionToState is the decision points for all rules, subrules, optional + // blocks, ()+, ()*, etc. Each subrule/rule is a decision point, and we must track them so we + // can go back later and build DFA predictors for them. This includes + // all the rules, subrules, optional blocks, ()+, ()* etc... + DecisionToState []DecisionState + + // grammarType is the ATN type and is used for deserializing ATNs from strings. + grammarType int + + // lexerActions is referenced by action transitions in the ATN for lexer ATNs. + lexerActions []LexerAction + + // maxTokenType is the maximum value for any symbol recognized by a transition in the ATN. + maxTokenType int + + modeNameToStartState map[string]*TokensStartState + + modeToStartState []*TokensStartState + + // ruleToStartState maps from rule index to starting state number. + ruleToStartState []*RuleStartState + + // ruleToStopState maps from rule index to stop state number. + ruleToStopState []*RuleStopState + + // ruleToTokenType maps the rule index to the resulting token type for lexer + // ATNs. For parser ATNs, it maps the rule index to the generated bypass token + // type if ATNDeserializationOptions.isGenerateRuleBypassTransitions was + // specified, and otherwise is nil. + ruleToTokenType []int + + states []ATNState + + mu sync.Mutex + stateMu sync.RWMutex + edgeMu sync.RWMutex +} + +// NewATN returns a new ATN struct representing the given grammarType and is used +// for runtime deserialization of ATNs from the code generated by the ANTLR tool +func NewATN(grammarType int, maxTokenType int) *ATN { + return &ATN{ + grammarType: grammarType, + maxTokenType: maxTokenType, + modeNameToStartState: make(map[string]*TokensStartState), + } +} + +// NextTokensInContext computes and returns the set of valid tokens that can occur starting +// in state s. If ctx is nil, the set of tokens will not include what can follow +// the rule surrounding s. In other words, the set will be restricted to tokens +// reachable staying within the rule of s. +func (a *ATN) NextTokensInContext(s ATNState, ctx RuleContext) *IntervalSet { + return NewLL1Analyzer(a).Look(s, nil, ctx) +} + +// NextTokensNoContext computes and returns the set of valid tokens that can occur starting +// in state s and staying in same rule. [antlr.Token.EPSILON] is in set if we reach end of +// rule. +func (a *ATN) NextTokensNoContext(s ATNState) *IntervalSet { + a.mu.Lock() + defer a.mu.Unlock() + iset := s.GetNextTokenWithinRule() + if iset == nil { + iset = a.NextTokensInContext(s, nil) + iset.readOnly = true + s.SetNextTokenWithinRule(iset) + } + return iset +} + +// NextTokens computes and returns the set of valid tokens starting in state s, by +// calling either [NextTokensNoContext] (ctx == nil) or [NextTokensInContext] (ctx != nil). +func (a *ATN) NextTokens(s ATNState, ctx RuleContext) *IntervalSet { + if ctx == nil { + return a.NextTokensNoContext(s) + } + + return a.NextTokensInContext(s, ctx) +} + +func (a *ATN) addState(state ATNState) { + if state != nil { + state.SetATN(a) + state.SetStateNumber(len(a.states)) + } + + a.states = append(a.states, state) +} + +func (a *ATN) removeState(state ATNState) { + a.states[state.GetStateNumber()] = nil // Just free the memory; don't shift states in the slice +} + +func (a *ATN) defineDecisionState(s DecisionState) int { + a.DecisionToState = append(a.DecisionToState, s) + s.setDecision(len(a.DecisionToState) - 1) + + return s.getDecision() +} + +func (a *ATN) getDecisionState(decision int) DecisionState { + if len(a.DecisionToState) == 0 { + return nil + } + + return a.DecisionToState[decision] +} + +// getExpectedTokens computes the set of input symbols which could follow ATN +// state number stateNumber in the specified full parse context ctx and returns +// the set of potentially valid input symbols which could follow the specified +// state in the specified context. This method considers the complete parser +// context, but does not evaluate semantic predicates (i.e. all predicates +// encountered during the calculation are assumed true). If a path in the ATN +// exists from the starting state to the RuleStopState of the outermost context +// without Matching any symbols, Token.EOF is added to the returned set. +// +// A nil ctx defaults to ParserRuleContext.EMPTY. +// +// It panics if the ATN does not contain state stateNumber. +func (a *ATN) getExpectedTokens(stateNumber int, ctx RuleContext) *IntervalSet { + if stateNumber < 0 || stateNumber >= len(a.states) { + panic("Invalid state number.") + } + + s := a.states[stateNumber] + following := a.NextTokens(s, nil) + + if !following.contains(TokenEpsilon) { + return following + } + + expected := NewIntervalSet() + + expected.addSet(following) + expected.removeOne(TokenEpsilon) + + for ctx != nil && ctx.GetInvokingState() >= 0 && following.contains(TokenEpsilon) { + invokingState := a.states[ctx.GetInvokingState()] + rt := invokingState.GetTransitions()[0] + + following = a.NextTokens(rt.(*RuleTransition).followState, nil) + expected.addSet(following) + expected.removeOne(TokenEpsilon) + ctx = ctx.GetParent().(RuleContext) + } + + if following.contains(TokenEpsilon) { + expected.addOne(TokenEOF) + } + + return expected +} diff --git a/runtime/Go/antlr/v4/atn_config.go b/runtime/Go/antlr/v4/atn_config.go new file mode 100644 index 0000000000..7619fa172e --- /dev/null +++ b/runtime/Go/antlr/v4/atn_config.go @@ -0,0 +1,303 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" +) + +// ATNConfig is a tuple: (ATN state, predicted alt, syntactic, semantic +// context). The syntactic context is a graph-structured stack node whose +// path(s) to the root is the rule invocation(s) chain used to arrive at the +// state. The semantic context is the tree of semantic predicates encountered +// before reaching an ATN state. +type ATNConfig interface { + Equals(o Collectable[ATNConfig]) bool + Hash() int + + GetState() ATNState + GetAlt() int + GetSemanticContext() SemanticContext + + GetContext() PredictionContext + SetContext(PredictionContext) + + GetReachesIntoOuterContext() int + SetReachesIntoOuterContext(int) + + String() string + + getPrecedenceFilterSuppressed() bool + setPrecedenceFilterSuppressed(bool) +} + +type BaseATNConfig struct { + precedenceFilterSuppressed bool + state ATNState + alt int + context PredictionContext + semanticContext SemanticContext + reachesIntoOuterContext int +} + +func NewBaseATNConfig7(old *BaseATNConfig) ATNConfig { // TODO: Dup + return &BaseATNConfig{ + state: old.state, + alt: old.alt, + context: old.context, + semanticContext: old.semanticContext, + reachesIntoOuterContext: old.reachesIntoOuterContext, + } +} + +func NewBaseATNConfig6(state ATNState, alt int, context PredictionContext) *BaseATNConfig { + return NewBaseATNConfig5(state, alt, context, SemanticContextNone) +} + +func NewBaseATNConfig5(state ATNState, alt int, context PredictionContext, semanticContext SemanticContext) *BaseATNConfig { + if semanticContext == nil { + panic("semanticContext cannot be nil") // TODO: Necessary? + } + + return &BaseATNConfig{state: state, alt: alt, context: context, semanticContext: semanticContext} +} + +func NewBaseATNConfig4(c ATNConfig, state ATNState) *BaseATNConfig { + return NewBaseATNConfig(c, state, c.GetContext(), c.GetSemanticContext()) +} + +func NewBaseATNConfig3(c ATNConfig, state ATNState, semanticContext SemanticContext) *BaseATNConfig { + return NewBaseATNConfig(c, state, c.GetContext(), semanticContext) +} + +func NewBaseATNConfig2(c ATNConfig, semanticContext SemanticContext) *BaseATNConfig { + return NewBaseATNConfig(c, c.GetState(), c.GetContext(), semanticContext) +} + +func NewBaseATNConfig1(c ATNConfig, state ATNState, context PredictionContext) *BaseATNConfig { + return NewBaseATNConfig(c, state, context, c.GetSemanticContext()) +} + +func NewBaseATNConfig(c ATNConfig, state ATNState, context PredictionContext, semanticContext SemanticContext) *BaseATNConfig { + if semanticContext == nil { + panic("semanticContext cannot be nil") + } + + return &BaseATNConfig{ + state: state, + alt: c.GetAlt(), + context: context, + semanticContext: semanticContext, + reachesIntoOuterContext: c.GetReachesIntoOuterContext(), + precedenceFilterSuppressed: c.getPrecedenceFilterSuppressed(), + } +} + +func (b *BaseATNConfig) getPrecedenceFilterSuppressed() bool { + return b.precedenceFilterSuppressed +} + +func (b *BaseATNConfig) setPrecedenceFilterSuppressed(v bool) { + b.precedenceFilterSuppressed = v +} + +func (b *BaseATNConfig) GetState() ATNState { + return b.state +} + +func (b *BaseATNConfig) GetAlt() int { + return b.alt +} + +func (b *BaseATNConfig) SetContext(v PredictionContext) { + b.context = v +} +func (b *BaseATNConfig) GetContext() PredictionContext { + return b.context +} + +func (b *BaseATNConfig) GetSemanticContext() SemanticContext { + return b.semanticContext +} + +func (b *BaseATNConfig) GetReachesIntoOuterContext() int { + return b.reachesIntoOuterContext +} + +func (b *BaseATNConfig) SetReachesIntoOuterContext(v int) { + b.reachesIntoOuterContext = v +} + +// Equals is the default comparison function for an ATNConfig when no specialist implementation is required +// for a collection. +// +// An ATN configuration is equal to another if both have the same state, they +// predict the same alternative, and syntactic/semantic contexts are the same. +func (b *BaseATNConfig) Equals(o Collectable[ATNConfig]) bool { + if b == o { + return true + } else if o == nil { + return false + } + + var other, ok = o.(*BaseATNConfig) + + if !ok { + return false + } + + var equal bool + + if b.context == nil { + equal = other.context == nil + } else { + equal = b.context.Equals(other.context) + } + + var ( + nums = b.state.GetStateNumber() == other.state.GetStateNumber() + alts = b.alt == other.alt + cons = b.semanticContext.Equals(other.semanticContext) + sups = b.precedenceFilterSuppressed == other.precedenceFilterSuppressed + ) + + return nums && alts && cons && sups && equal +} + +// Hash is the default hash function for BaseATNConfig, when no specialist hash function +// is required for a collection +func (b *BaseATNConfig) Hash() int { + var c int + if b.context != nil { + c = b.context.Hash() + } + + h := murmurInit(7) + h = murmurUpdate(h, b.state.GetStateNumber()) + h = murmurUpdate(h, b.alt) + h = murmurUpdate(h, c) + h = murmurUpdate(h, b.semanticContext.Hash()) + return murmurFinish(h, 4) +} + +func (b *BaseATNConfig) String() string { + var s1, s2, s3 string + + if b.context != nil { + s1 = ",[" + fmt.Sprint(b.context) + "]" + } + + if b.semanticContext != SemanticContextNone { + s2 = "," + fmt.Sprint(b.semanticContext) + } + + if b.reachesIntoOuterContext > 0 { + s3 = ",up=" + fmt.Sprint(b.reachesIntoOuterContext) + } + + return fmt.Sprintf("(%v,%v%v%v%v)", b.state, b.alt, s1, s2, s3) +} + +type LexerATNConfig struct { + *BaseATNConfig + lexerActionExecutor *LexerActionExecutor + passedThroughNonGreedyDecision bool +} + +func NewLexerATNConfig6(state ATNState, alt int, context PredictionContext) *LexerATNConfig { + return &LexerATNConfig{BaseATNConfig: NewBaseATNConfig5(state, alt, context, SemanticContextNone)} +} + +func NewLexerATNConfig5(state ATNState, alt int, context PredictionContext, lexerActionExecutor *LexerActionExecutor) *LexerATNConfig { + return &LexerATNConfig{ + BaseATNConfig: NewBaseATNConfig5(state, alt, context, SemanticContextNone), + lexerActionExecutor: lexerActionExecutor, + } +} + +func NewLexerATNConfig4(c *LexerATNConfig, state ATNState) *LexerATNConfig { + return &LexerATNConfig{ + BaseATNConfig: NewBaseATNConfig(c, state, c.GetContext(), c.GetSemanticContext()), + lexerActionExecutor: c.lexerActionExecutor, + passedThroughNonGreedyDecision: checkNonGreedyDecision(c, state), + } +} + +func NewLexerATNConfig3(c *LexerATNConfig, state ATNState, lexerActionExecutor *LexerActionExecutor) *LexerATNConfig { + return &LexerATNConfig{ + BaseATNConfig: NewBaseATNConfig(c, state, c.GetContext(), c.GetSemanticContext()), + lexerActionExecutor: lexerActionExecutor, + passedThroughNonGreedyDecision: checkNonGreedyDecision(c, state), + } +} + +func NewLexerATNConfig2(c *LexerATNConfig, state ATNState, context PredictionContext) *LexerATNConfig { + return &LexerATNConfig{ + BaseATNConfig: NewBaseATNConfig(c, state, context, c.GetSemanticContext()), + lexerActionExecutor: c.lexerActionExecutor, + passedThroughNonGreedyDecision: checkNonGreedyDecision(c, state), + } +} + +func NewLexerATNConfig1(state ATNState, alt int, context PredictionContext) *LexerATNConfig { + return &LexerATNConfig{BaseATNConfig: NewBaseATNConfig5(state, alt, context, SemanticContextNone)} +} + +// Hash is the default hash function for LexerATNConfig objects, it can be used directly or via +// the default comparator [ObjEqComparator]. +func (l *LexerATNConfig) Hash() int { + var f int + if l.passedThroughNonGreedyDecision { + f = 1 + } else { + f = 0 + } + h := murmurInit(7) + h = murmurUpdate(h, l.state.GetStateNumber()) + h = murmurUpdate(h, l.alt) + h = murmurUpdate(h, l.context.Hash()) + h = murmurUpdate(h, l.semanticContext.Hash()) + h = murmurUpdate(h, f) + h = murmurUpdate(h, l.lexerActionExecutor.Hash()) + h = murmurFinish(h, 6) + return h +} + +// Equals is the default comparison function for LexerATNConfig objects, it can be used directly or via +// the default comparator [ObjEqComparator]. +func (l *LexerATNConfig) Equals(other Collectable[ATNConfig]) bool { + if l == other { + return true + } + var othert, ok = other.(*LexerATNConfig) + + if l == other { + return true + } else if !ok { + return false + } else if l.passedThroughNonGreedyDecision != othert.passedThroughNonGreedyDecision { + return false + } + + var b bool + + if l.lexerActionExecutor != nil { + b = !l.lexerActionExecutor.Equals(othert.lexerActionExecutor) + } else { + b = othert.lexerActionExecutor != nil + } + + if b { + return false + } + + return l.BaseATNConfig.Equals(othert.BaseATNConfig) +} + +func checkNonGreedyDecision(source *LexerATNConfig, target ATNState) bool { + var ds, ok = target.(DecisionState) + + return source.passedThroughNonGreedyDecision || (ok && ds.getNonGreedy()) +} diff --git a/runtime/Go/antlr/v4/atn_config_set.go b/runtime/Go/antlr/v4/atn_config_set.go new file mode 100644 index 0000000000..582f5fa4b5 --- /dev/null +++ b/runtime/Go/antlr/v4/atn_config_set.go @@ -0,0 +1,439 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "fmt" + +type ATNConfigSet interface { + Hash() int + Equals(o Collectable[ATNConfig]) bool + Add(ATNConfig, *DoubleDict) bool + AddAll([]ATNConfig) bool + + GetStates() *JStore[ATNState, Comparator[ATNState]] + GetPredicates() []SemanticContext + GetItems() []ATNConfig + + OptimizeConfigs(interpreter *BaseATNSimulator) + + Length() int + IsEmpty() bool + Contains(ATNConfig) bool + ContainsFast(ATNConfig) bool + Clear() + String() string + + HasSemanticContext() bool + SetHasSemanticContext(v bool) + + ReadOnly() bool + SetReadOnly(bool) + + GetConflictingAlts() *BitSet + SetConflictingAlts(*BitSet) + + Alts() *BitSet + + FullContext() bool + + GetUniqueAlt() int + SetUniqueAlt(int) + + GetDipsIntoOuterContext() bool + SetDipsIntoOuterContext(bool) +} + +// BaseATNConfigSet is a specialized set of ATNConfig that tracks information +// about its elements and can combine similar configurations using a +// graph-structured stack. +type BaseATNConfigSet struct { + cachedHash int + + // configLookup is used to determine whether two BaseATNConfigSets are equal. We + // need all configurations with the same (s, i, _, semctx) to be equal. A key + // effectively doubles the number of objects associated with ATNConfigs. All + // keys are hashed by (s, i, _, pi), not including the context. Wiped out when + // read-only because a set becomes a DFA state. + configLookup *JStore[ATNConfig, Comparator[ATNConfig]] + + // configs is the added elements. + configs []ATNConfig + + // TODO: These fields make me pretty uncomfortable, but it is nice to pack up + // info together because it saves recomputation. Can we track conflicts as they + // are added to save scanning configs later? + conflictingAlts *BitSet + + // dipsIntoOuterContext is used by parsers and lexers. In a lexer, it indicates + // we hit a pred while computing a closure operation. Do not make a DFA state + // from the BaseATNConfigSet in this case. TODO: How is this used by parsers? + dipsIntoOuterContext bool + + // fullCtx is whether it is part of a full context LL prediction. Used to + // determine how to merge $. It is a wildcard with SLL, but not for an LL + // context merge. + fullCtx bool + + // Used in parser and lexer. In lexer, it indicates we hit a pred + // while computing a closure operation. Don't make a DFA state from a. + hasSemanticContext bool + + // readOnly is whether it is read-only. Do not + // allow any code to manipulate the set if true because DFA states will point at + // sets and those must not change. It not, protect other fields; conflictingAlts + // in particular, which is assigned after readOnly. + readOnly bool + + // TODO: These fields make me pretty uncomfortable, but it is nice to pack up + // info together because it saves recomputation. Can we track conflicts as they + // are added to save scanning configs later? + uniqueAlt int +} + +func (b *BaseATNConfigSet) Alts() *BitSet { + alts := NewBitSet() + for _, it := range b.configs { + alts.add(it.GetAlt()) + } + return alts +} + +func NewBaseATNConfigSet(fullCtx bool) *BaseATNConfigSet { + return &BaseATNConfigSet{ + cachedHash: -1, + configLookup: NewJStore[ATNConfig, Comparator[ATNConfig]](&ATNConfigComparator[ATNConfig]{}), + fullCtx: fullCtx, + } +} + +// Add merges contexts with existing configs for (s, i, pi, _), where s is the +// ATNConfig.state, i is the ATNConfig.alt, and pi is the +// ATNConfig.semanticContext. We use (s,i,pi) as the key. Updates +// dipsIntoOuterContext and hasSemanticContext when necessary. +func (b *BaseATNConfigSet) Add(config ATNConfig, mergeCache *DoubleDict) bool { + if b.readOnly { + panic("set is read-only") + } + + if config.GetSemanticContext() != SemanticContextNone { + b.hasSemanticContext = true + } + + if config.GetReachesIntoOuterContext() > 0 { + b.dipsIntoOuterContext = true + } + + existing, present := b.configLookup.Put(config) + + // The config was not already in the set + // + if !present { + b.cachedHash = -1 + b.configs = append(b.configs, config) // Track order here + return true + } + + // Merge a previous (s, i, pi, _) with it and save the result + rootIsWildcard := !b.fullCtx + merged := merge(existing.GetContext(), config.GetContext(), rootIsWildcard, mergeCache) + + // No need to check for existing.context because config.context is in the cache, + // since the only way to create new graphs is the "call rule" and here. We cache + // at both places. + existing.SetReachesIntoOuterContext(intMax(existing.GetReachesIntoOuterContext(), config.GetReachesIntoOuterContext())) + + // Preserve the precedence filter suppression during the merge + if config.getPrecedenceFilterSuppressed() { + existing.setPrecedenceFilterSuppressed(true) + } + + // Replace the context because there is no need to do alt mapping + existing.SetContext(merged) + + return true +} + +func (b *BaseATNConfigSet) GetStates() *JStore[ATNState, Comparator[ATNState]] { + + // states uses the standard comparator provided by the ATNState instance + // + states := NewJStore[ATNState, Comparator[ATNState]](&ObjEqComparator[ATNState]{}) + + for i := 0; i < len(b.configs); i++ { + states.Put(b.configs[i].GetState()) + } + + return states +} + +func (b *BaseATNConfigSet) HasSemanticContext() bool { + return b.hasSemanticContext +} + +func (b *BaseATNConfigSet) SetHasSemanticContext(v bool) { + b.hasSemanticContext = v +} + +func (b *BaseATNConfigSet) GetPredicates() []SemanticContext { + preds := make([]SemanticContext, 0) + + for i := 0; i < len(b.configs); i++ { + c := b.configs[i].GetSemanticContext() + + if c != SemanticContextNone { + preds = append(preds, c) + } + } + + return preds +} + +func (b *BaseATNConfigSet) GetItems() []ATNConfig { + return b.configs +} + +func (b *BaseATNConfigSet) OptimizeConfigs(interpreter *BaseATNSimulator) { + if b.readOnly { + panic("set is read-only") + } + + if b.configLookup.Len() == 0 { + return + } + + for i := 0; i < len(b.configs); i++ { + config := b.configs[i] + + config.SetContext(interpreter.getCachedContext(config.GetContext())) + } +} + +func (b *BaseATNConfigSet) AddAll(coll []ATNConfig) bool { + for i := 0; i < len(coll); i++ { + b.Add(coll[i], nil) + } + + return false +} + +// Compare is a hack function just to verify that adding DFAstares to the known +// set works, so long as comparison of ATNConfigSet s works. For that to work, we +// need to make sure that the set of ATNConfigs in two sets are equivalent. We can't +// know the order, so we do this inefficient hack. If this proves the point, then +// we can change the config set to a better structure. +func (b *BaseATNConfigSet) Compare(bs *BaseATNConfigSet) bool { + if len(b.configs) != len(bs.configs) { + return false + } + + for _, c := range b.configs { + found := false + for _, c2 := range bs.configs { + if c.Equals(c2) { + found = true + break + } + } + + if !found { + return false + } + + } + return true +} + +func (b *BaseATNConfigSet) Equals(other Collectable[ATNConfig]) bool { + if b == other { + return true + } else if _, ok := other.(*BaseATNConfigSet); !ok { + return false + } + + other2 := other.(*BaseATNConfigSet) + + return b.configs != nil && + b.fullCtx == other2.fullCtx && + b.uniqueAlt == other2.uniqueAlt && + b.conflictingAlts == other2.conflictingAlts && + b.hasSemanticContext == other2.hasSemanticContext && + b.dipsIntoOuterContext == other2.dipsIntoOuterContext && + b.Compare(other2) +} + +func (b *BaseATNConfigSet) Hash() int { + if b.readOnly { + if b.cachedHash == -1 { + b.cachedHash = b.hashCodeConfigs() + } + + return b.cachedHash + } + + return b.hashCodeConfigs() +} + +func (b *BaseATNConfigSet) hashCodeConfigs() int { + h := 1 + for _, config := range b.configs { + h = 31*h + config.Hash() + } + return h +} + +func (b *BaseATNConfigSet) Length() int { + return len(b.configs) +} + +func (b *BaseATNConfigSet) IsEmpty() bool { + return len(b.configs) == 0 +} + +func (b *BaseATNConfigSet) Contains(item ATNConfig) bool { + if b.configLookup == nil { + panic("not implemented for read-only sets") + } + + return b.configLookup.Contains(item) +} + +func (b *BaseATNConfigSet) ContainsFast(item ATNConfig) bool { + if b.configLookup == nil { + panic("not implemented for read-only sets") + } + + return b.configLookup.Contains(item) // TODO: containsFast is not implemented for Set +} + +func (b *BaseATNConfigSet) Clear() { + if b.readOnly { + panic("set is read-only") + } + + b.configs = make([]ATNConfig, 0) + b.cachedHash = -1 + b.configLookup = NewJStore[ATNConfig, Comparator[ATNConfig]](&BaseATNConfigComparator[ATNConfig]{}) +} + +func (b *BaseATNConfigSet) FullContext() bool { + return b.fullCtx +} + +func (b *BaseATNConfigSet) GetDipsIntoOuterContext() bool { + return b.dipsIntoOuterContext +} + +func (b *BaseATNConfigSet) SetDipsIntoOuterContext(v bool) { + b.dipsIntoOuterContext = v +} + +func (b *BaseATNConfigSet) GetUniqueAlt() int { + return b.uniqueAlt +} + +func (b *BaseATNConfigSet) SetUniqueAlt(v int) { + b.uniqueAlt = v +} + +func (b *BaseATNConfigSet) GetConflictingAlts() *BitSet { + return b.conflictingAlts +} + +func (b *BaseATNConfigSet) SetConflictingAlts(v *BitSet) { + b.conflictingAlts = v +} + +func (b *BaseATNConfigSet) ReadOnly() bool { + return b.readOnly +} + +func (b *BaseATNConfigSet) SetReadOnly(readOnly bool) { + b.readOnly = readOnly + + if readOnly { + b.configLookup = nil // Read only, so no need for the lookup cache + } +} + +func (b *BaseATNConfigSet) String() string { + s := "[" + + for i, c := range b.configs { + s += c.String() + + if i != len(b.configs)-1 { + s += ", " + } + } + + s += "]" + + if b.hasSemanticContext { + s += ",hasSemanticContext=" + fmt.Sprint(b.hasSemanticContext) + } + + if b.uniqueAlt != ATNInvalidAltNumber { + s += ",uniqueAlt=" + fmt.Sprint(b.uniqueAlt) + } + + if b.conflictingAlts != nil { + s += ",conflictingAlts=" + b.conflictingAlts.String() + } + + if b.dipsIntoOuterContext { + s += ",dipsIntoOuterContext" + } + + return s +} + +type OrderedATNConfigSet struct { + *BaseATNConfigSet +} + +func NewOrderedATNConfigSet() *OrderedATNConfigSet { + b := NewBaseATNConfigSet(false) + + // This set uses the standard Hash() and Equals() from ATNConfig + b.configLookup = NewJStore[ATNConfig, Comparator[ATNConfig]](&ObjEqComparator[ATNConfig]{}) + + return &OrderedATNConfigSet{BaseATNConfigSet: b} +} + +func hashATNConfig(i interface{}) int { + o := i.(ATNConfig) + hash := 7 + hash = 31*hash + o.GetState().GetStateNumber() + hash = 31*hash + o.GetAlt() + hash = 31*hash + o.GetSemanticContext().Hash() + return hash +} + +func equalATNConfigs(a, b interface{}) bool { + if a == nil || b == nil { + return false + } + + if a == b { + return true + } + + var ai, ok = a.(ATNConfig) + var bi, ok1 = b.(ATNConfig) + + if !ok || !ok1 { + return false + } + + if ai.GetState().GetStateNumber() != bi.GetState().GetStateNumber() { + return false + } + + if ai.GetAlt() != bi.GetAlt() { + return false + } + + return ai.GetSemanticContext().Equals(bi.GetSemanticContext()) +} diff --git a/runtime/Go/antlr/v4/atn_deserialization_options.go b/runtime/Go/antlr/v4/atn_deserialization_options.go new file mode 100644 index 0000000000..3c975ec7bf --- /dev/null +++ b/runtime/Go/antlr/v4/atn_deserialization_options.go @@ -0,0 +1,61 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "errors" + +var defaultATNDeserializationOptions = ATNDeserializationOptions{true, true, false} + +type ATNDeserializationOptions struct { + readOnly bool + verifyATN bool + generateRuleBypassTransitions bool +} + +func (opts *ATNDeserializationOptions) ReadOnly() bool { + return opts.readOnly +} + +func (opts *ATNDeserializationOptions) SetReadOnly(readOnly bool) { + if opts.readOnly { + panic(errors.New("Cannot mutate read only ATNDeserializationOptions")) + } + opts.readOnly = readOnly +} + +func (opts *ATNDeserializationOptions) VerifyATN() bool { + return opts.verifyATN +} + +func (opts *ATNDeserializationOptions) SetVerifyATN(verifyATN bool) { + if opts.readOnly { + panic(errors.New("Cannot mutate read only ATNDeserializationOptions")) + } + opts.verifyATN = verifyATN +} + +func (opts *ATNDeserializationOptions) GenerateRuleBypassTransitions() bool { + return opts.generateRuleBypassTransitions +} + +func (opts *ATNDeserializationOptions) SetGenerateRuleBypassTransitions(generateRuleBypassTransitions bool) { + if opts.readOnly { + panic(errors.New("Cannot mutate read only ATNDeserializationOptions")) + } + opts.generateRuleBypassTransitions = generateRuleBypassTransitions +} + +func DefaultATNDeserializationOptions() *ATNDeserializationOptions { + return NewATNDeserializationOptions(&defaultATNDeserializationOptions) +} + +func NewATNDeserializationOptions(other *ATNDeserializationOptions) *ATNDeserializationOptions { + o := new(ATNDeserializationOptions) + if other != nil { + *o = *other + o.readOnly = false + } + return o +} diff --git a/runtime/Go/antlr/v4/atn_deserializer.go b/runtime/Go/antlr/v4/atn_deserializer.go new file mode 100644 index 0000000000..3888856b4b --- /dev/null +++ b/runtime/Go/antlr/v4/atn_deserializer.go @@ -0,0 +1,683 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" +) + +const serializedVersion = 4 + +type loopEndStateIntPair struct { + item0 *LoopEndState + item1 int +} + +type blockStartStateIntPair struct { + item0 BlockStartState + item1 int +} + +type ATNDeserializer struct { + options *ATNDeserializationOptions + data []int32 + pos int +} + +func NewATNDeserializer(options *ATNDeserializationOptions) *ATNDeserializer { + if options == nil { + options = &defaultATNDeserializationOptions + } + + return &ATNDeserializer{options: options} +} + +func stringInSlice(a string, list []string) int { + for i, b := range list { + if b == a { + return i + } + } + + return -1 +} + +func (a *ATNDeserializer) Deserialize(data []int32) *ATN { + a.data = data + a.pos = 0 + a.checkVersion() + + atn := a.readATN() + + a.readStates(atn) + a.readRules(atn) + a.readModes(atn) + + sets := a.readSets(atn, nil) + + a.readEdges(atn, sets) + a.readDecisions(atn) + a.readLexerActions(atn) + a.markPrecedenceDecisions(atn) + a.verifyATN(atn) + + if a.options.GenerateRuleBypassTransitions() && atn.grammarType == ATNTypeParser { + a.generateRuleBypassTransitions(atn) + // Re-verify after modification + a.verifyATN(atn) + } + + return atn + +} + +func (a *ATNDeserializer) checkVersion() { + version := a.readInt() + + if version != serializedVersion { + panic("Could not deserialize ATN with version " + strconv.Itoa(version) + " (expected " + strconv.Itoa(serializedVersion) + ").") + } +} + +func (a *ATNDeserializer) readATN() *ATN { + grammarType := a.readInt() + maxTokenType := a.readInt() + + return NewATN(grammarType, maxTokenType) +} + +func (a *ATNDeserializer) readStates(atn *ATN) { + nstates := a.readInt() + + // Allocate worst case size. + loopBackStateNumbers := make([]loopEndStateIntPair, 0, nstates) + endStateNumbers := make([]blockStartStateIntPair, 0, nstates) + + // Preallocate states slice. + atn.states = make([]ATNState, 0, nstates) + + for i := 0; i < nstates; i++ { + stype := a.readInt() + + // Ignore bad types of states + if stype == ATNStateInvalidType { + atn.addState(nil) + continue + } + + ruleIndex := a.readInt() + + s := a.stateFactory(stype, ruleIndex) + + if stype == ATNStateLoopEnd { + loopBackStateNumber := a.readInt() + + loopBackStateNumbers = append(loopBackStateNumbers, loopEndStateIntPair{s.(*LoopEndState), loopBackStateNumber}) + } else if s2, ok := s.(BlockStartState); ok { + endStateNumber := a.readInt() + + endStateNumbers = append(endStateNumbers, blockStartStateIntPair{s2, endStateNumber}) + } + + atn.addState(s) + } + + // Delay the assignment of loop back and end states until we know all the state + // instances have been initialized + for _, pair := range loopBackStateNumbers { + pair.item0.loopBackState = atn.states[pair.item1] + } + + for _, pair := range endStateNumbers { + pair.item0.setEndState(atn.states[pair.item1].(*BlockEndState)) + } + + numNonGreedyStates := a.readInt() + for j := 0; j < numNonGreedyStates; j++ { + stateNumber := a.readInt() + + atn.states[stateNumber].(DecisionState).setNonGreedy(true) + } + + numPrecedenceStates := a.readInt() + for j := 0; j < numPrecedenceStates; j++ { + stateNumber := a.readInt() + + atn.states[stateNumber].(*RuleStartState).isPrecedenceRule = true + } +} + +func (a *ATNDeserializer) readRules(atn *ATN) { + nrules := a.readInt() + + if atn.grammarType == ATNTypeLexer { + atn.ruleToTokenType = make([]int, nrules) + } + + atn.ruleToStartState = make([]*RuleStartState, nrules) + + for i := range atn.ruleToStartState { + s := a.readInt() + startState := atn.states[s].(*RuleStartState) + + atn.ruleToStartState[i] = startState + + if atn.grammarType == ATNTypeLexer { + tokenType := a.readInt() + + atn.ruleToTokenType[i] = tokenType + } + } + + atn.ruleToStopState = make([]*RuleStopState, nrules) + + for _, state := range atn.states { + if s2, ok := state.(*RuleStopState); ok { + atn.ruleToStopState[s2.ruleIndex] = s2 + atn.ruleToStartState[s2.ruleIndex].stopState = s2 + } + } +} + +func (a *ATNDeserializer) readModes(atn *ATN) { + nmodes := a.readInt() + atn.modeToStartState = make([]*TokensStartState, nmodes) + + for i := range atn.modeToStartState { + s := a.readInt() + + atn.modeToStartState[i] = atn.states[s].(*TokensStartState) + } +} + +func (a *ATNDeserializer) readSets(atn *ATN, sets []*IntervalSet) []*IntervalSet { + m := a.readInt() + + // Preallocate the needed capacity. + if cap(sets)-len(sets) < m { + isets := make([]*IntervalSet, len(sets), len(sets)+m) + copy(isets, sets) + sets = isets + } + + for i := 0; i < m; i++ { + iset := NewIntervalSet() + + sets = append(sets, iset) + + n := a.readInt() + containsEOF := a.readInt() + + if containsEOF != 0 { + iset.addOne(-1) + } + + for j := 0; j < n; j++ { + i1 := a.readInt() + i2 := a.readInt() + + iset.addRange(i1, i2) + } + } + + return sets +} + +func (a *ATNDeserializer) readEdges(atn *ATN, sets []*IntervalSet) { + nedges := a.readInt() + + for i := 0; i < nedges; i++ { + var ( + src = a.readInt() + trg = a.readInt() + ttype = a.readInt() + arg1 = a.readInt() + arg2 = a.readInt() + arg3 = a.readInt() + trans = a.edgeFactory(atn, ttype, src, trg, arg1, arg2, arg3, sets) + srcState = atn.states[src] + ) + + srcState.AddTransition(trans, -1) + } + + // Edges for rule stop states can be derived, so they are not serialized + for _, state := range atn.states { + for _, t := range state.GetTransitions() { + var rt, ok = t.(*RuleTransition) + + if !ok { + continue + } + + outermostPrecedenceReturn := -1 + + if atn.ruleToStartState[rt.getTarget().GetRuleIndex()].isPrecedenceRule { + if rt.precedence == 0 { + outermostPrecedenceReturn = rt.getTarget().GetRuleIndex() + } + } + + trans := NewEpsilonTransition(rt.followState, outermostPrecedenceReturn) + + atn.ruleToStopState[rt.getTarget().GetRuleIndex()].AddTransition(trans, -1) + } + } + + for _, state := range atn.states { + if s2, ok := state.(BlockStartState); ok { + // We need to know the end state to set its start state + if s2.getEndState() == nil { + panic("IllegalState") + } + + // Block end states can only be associated to a single block start state + if s2.getEndState().startState != nil { + panic("IllegalState") + } + + s2.getEndState().startState = state + } + + if s2, ok := state.(*PlusLoopbackState); ok { + for _, t := range s2.GetTransitions() { + if t2, ok := t.getTarget().(*PlusBlockStartState); ok { + t2.loopBackState = state + } + } + } else if s2, ok := state.(*StarLoopbackState); ok { + for _, t := range s2.GetTransitions() { + if t2, ok := t.getTarget().(*StarLoopEntryState); ok { + t2.loopBackState = state + } + } + } + } +} + +func (a *ATNDeserializer) readDecisions(atn *ATN) { + ndecisions := a.readInt() + + for i := 0; i < ndecisions; i++ { + s := a.readInt() + decState := atn.states[s].(DecisionState) + + atn.DecisionToState = append(atn.DecisionToState, decState) + decState.setDecision(i) + } +} + +func (a *ATNDeserializer) readLexerActions(atn *ATN) { + if atn.grammarType == ATNTypeLexer { + count := a.readInt() + + atn.lexerActions = make([]LexerAction, count) + + for i := range atn.lexerActions { + actionType := a.readInt() + data1 := a.readInt() + data2 := a.readInt() + atn.lexerActions[i] = a.lexerActionFactory(actionType, data1, data2) + } + } +} + +func (a *ATNDeserializer) generateRuleBypassTransitions(atn *ATN) { + count := len(atn.ruleToStartState) + + for i := 0; i < count; i++ { + atn.ruleToTokenType[i] = atn.maxTokenType + i + 1 + } + + for i := 0; i < count; i++ { + a.generateRuleBypassTransition(atn, i) + } +} + +func (a *ATNDeserializer) generateRuleBypassTransition(atn *ATN, idx int) { + bypassStart := NewBasicBlockStartState() + + bypassStart.ruleIndex = idx + atn.addState(bypassStart) + + bypassStop := NewBlockEndState() + + bypassStop.ruleIndex = idx + atn.addState(bypassStop) + + bypassStart.endState = bypassStop + + atn.defineDecisionState(bypassStart.BaseDecisionState) + + bypassStop.startState = bypassStart + + var excludeTransition Transition + var endState ATNState + + if atn.ruleToStartState[idx].isPrecedenceRule { + // Wrap from the beginning of the rule to the StarLoopEntryState + endState = nil + + for i := 0; i < len(atn.states); i++ { + state := atn.states[i] + + if a.stateIsEndStateFor(state, idx) != nil { + endState = state + excludeTransition = state.(*StarLoopEntryState).loopBackState.GetTransitions()[0] + + break + } + } + + if excludeTransition == nil { + panic("Couldn't identify final state of the precedence rule prefix section.") + } + } else { + endState = atn.ruleToStopState[idx] + } + + // All non-excluded transitions that currently target end state need to target + // blockEnd instead + for i := 0; i < len(atn.states); i++ { + state := atn.states[i] + + for j := 0; j < len(state.GetTransitions()); j++ { + transition := state.GetTransitions()[j] + + if transition == excludeTransition { + continue + } + + if transition.getTarget() == endState { + transition.setTarget(bypassStop) + } + } + } + + // All transitions leaving the rule start state need to leave blockStart instead + ruleToStartState := atn.ruleToStartState[idx] + count := len(ruleToStartState.GetTransitions()) + + for count > 0 { + bypassStart.AddTransition(ruleToStartState.GetTransitions()[count-1], -1) + ruleToStartState.SetTransitions([]Transition{ruleToStartState.GetTransitions()[len(ruleToStartState.GetTransitions())-1]}) + } + + // Link the new states + atn.ruleToStartState[idx].AddTransition(NewEpsilonTransition(bypassStart, -1), -1) + bypassStop.AddTransition(NewEpsilonTransition(endState, -1), -1) + + MatchState := NewBasicState() + + atn.addState(MatchState) + MatchState.AddTransition(NewAtomTransition(bypassStop, atn.ruleToTokenType[idx]), -1) + bypassStart.AddTransition(NewEpsilonTransition(MatchState, -1), -1) +} + +func (a *ATNDeserializer) stateIsEndStateFor(state ATNState, idx int) ATNState { + if state.GetRuleIndex() != idx { + return nil + } + + if _, ok := state.(*StarLoopEntryState); !ok { + return nil + } + + maybeLoopEndState := state.GetTransitions()[len(state.GetTransitions())-1].getTarget() + + if _, ok := maybeLoopEndState.(*LoopEndState); !ok { + return nil + } + + var _, ok = maybeLoopEndState.GetTransitions()[0].getTarget().(*RuleStopState) + + if maybeLoopEndState.(*LoopEndState).epsilonOnlyTransitions && ok { + return state + } + + return nil +} + +// markPrecedenceDecisions analyzes the StarLoopEntryState states in the +// specified ATN to set the StarLoopEntryState.precedenceRuleDecision field to +// the correct value. +func (a *ATNDeserializer) markPrecedenceDecisions(atn *ATN) { + for _, state := range atn.states { + if _, ok := state.(*StarLoopEntryState); !ok { + continue + } + + // We analyze the ATN to determine if a ATN decision state is the + // decision for the closure block that determines whether a + // precedence rule should continue or complete. + if atn.ruleToStartState[state.GetRuleIndex()].isPrecedenceRule { + maybeLoopEndState := state.GetTransitions()[len(state.GetTransitions())-1].getTarget() + + if s3, ok := maybeLoopEndState.(*LoopEndState); ok { + var _, ok2 = maybeLoopEndState.GetTransitions()[0].getTarget().(*RuleStopState) + + if s3.epsilonOnlyTransitions && ok2 { + state.(*StarLoopEntryState).precedenceRuleDecision = true + } + } + } + } +} + +func (a *ATNDeserializer) verifyATN(atn *ATN) { + if !a.options.VerifyATN() { + return + } + + // Verify assumptions + for _, state := range atn.states { + if state == nil { + continue + } + + a.checkCondition(state.GetEpsilonOnlyTransitions() || len(state.GetTransitions()) <= 1, "") + + switch s2 := state.(type) { + case *PlusBlockStartState: + a.checkCondition(s2.loopBackState != nil, "") + + case *StarLoopEntryState: + a.checkCondition(s2.loopBackState != nil, "") + a.checkCondition(len(s2.GetTransitions()) == 2, "") + + switch s2.transitions[0].getTarget().(type) { + case *StarBlockStartState: + _, ok := s2.transitions[1].getTarget().(*LoopEndState) + + a.checkCondition(ok, "") + a.checkCondition(!s2.nonGreedy, "") + + case *LoopEndState: + var _, ok = s2.transitions[1].getTarget().(*StarBlockStartState) + + a.checkCondition(ok, "") + a.checkCondition(s2.nonGreedy, "") + + default: + panic("IllegalState") + } + + case *StarLoopbackState: + a.checkCondition(len(state.GetTransitions()) == 1, "") + + var _, ok = state.GetTransitions()[0].getTarget().(*StarLoopEntryState) + + a.checkCondition(ok, "") + + case *LoopEndState: + a.checkCondition(s2.loopBackState != nil, "") + + case *RuleStartState: + a.checkCondition(s2.stopState != nil, "") + + case BlockStartState: + a.checkCondition(s2.getEndState() != nil, "") + + case *BlockEndState: + a.checkCondition(s2.startState != nil, "") + + case DecisionState: + a.checkCondition(len(s2.GetTransitions()) <= 1 || s2.getDecision() >= 0, "") + + default: + var _, ok = s2.(*RuleStopState) + + a.checkCondition(len(s2.GetTransitions()) <= 1 || ok, "") + } + } +} + +func (a *ATNDeserializer) checkCondition(condition bool, message string) { + if !condition { + if message == "" { + message = "IllegalState" + } + + panic(message) + } +} + +func (a *ATNDeserializer) readInt() int { + v := a.data[a.pos] + + a.pos++ + + return int(v) // data is 32 bits but int is at least that big +} + +func (a *ATNDeserializer) edgeFactory(atn *ATN, typeIndex, src, trg, arg1, arg2, arg3 int, sets []*IntervalSet) Transition { + target := atn.states[trg] + + switch typeIndex { + case TransitionEPSILON: + return NewEpsilonTransition(target, -1) + + case TransitionRANGE: + if arg3 != 0 { + return NewRangeTransition(target, TokenEOF, arg2) + } + + return NewRangeTransition(target, arg1, arg2) + + case TransitionRULE: + return NewRuleTransition(atn.states[arg1], arg2, arg3, target) + + case TransitionPREDICATE: + return NewPredicateTransition(target, arg1, arg2, arg3 != 0) + + case TransitionPRECEDENCE: + return NewPrecedencePredicateTransition(target, arg1) + + case TransitionATOM: + if arg3 != 0 { + return NewAtomTransition(target, TokenEOF) + } + + return NewAtomTransition(target, arg1) + + case TransitionACTION: + return NewActionTransition(target, arg1, arg2, arg3 != 0) + + case TransitionSET: + return NewSetTransition(target, sets[arg1]) + + case TransitionNOTSET: + return NewNotSetTransition(target, sets[arg1]) + + case TransitionWILDCARD: + return NewWildcardTransition(target) + } + + panic("The specified transition type is not valid.") +} + +func (a *ATNDeserializer) stateFactory(typeIndex, ruleIndex int) ATNState { + var s ATNState + + switch typeIndex { + case ATNStateInvalidType: + return nil + + case ATNStateBasic: + s = NewBasicState() + + case ATNStateRuleStart: + s = NewRuleStartState() + + case ATNStateBlockStart: + s = NewBasicBlockStartState() + + case ATNStatePlusBlockStart: + s = NewPlusBlockStartState() + + case ATNStateStarBlockStart: + s = NewStarBlockStartState() + + case ATNStateTokenStart: + s = NewTokensStartState() + + case ATNStateRuleStop: + s = NewRuleStopState() + + case ATNStateBlockEnd: + s = NewBlockEndState() + + case ATNStateStarLoopBack: + s = NewStarLoopbackState() + + case ATNStateStarLoopEntry: + s = NewStarLoopEntryState() + + case ATNStatePlusLoopBack: + s = NewPlusLoopbackState() + + case ATNStateLoopEnd: + s = NewLoopEndState() + + default: + panic(fmt.Sprintf("state type %d is invalid", typeIndex)) + } + + s.SetRuleIndex(ruleIndex) + + return s +} + +func (a *ATNDeserializer) lexerActionFactory(typeIndex, data1, data2 int) LexerAction { + switch typeIndex { + case LexerActionTypeChannel: + return NewLexerChannelAction(data1) + + case LexerActionTypeCustom: + return NewLexerCustomAction(data1, data2) + + case LexerActionTypeMode: + return NewLexerModeAction(data1) + + case LexerActionTypeMore: + return LexerMoreActionINSTANCE + + case LexerActionTypePopMode: + return LexerPopModeActionINSTANCE + + case LexerActionTypePushMode: + return NewLexerPushModeAction(data1) + + case LexerActionTypeSkip: + return LexerSkipActionINSTANCE + + case LexerActionTypeType: + return NewLexerTypeAction(data1) + + default: + panic(fmt.Sprintf("lexer action %d is invalid", typeIndex)) + } +} diff --git a/runtime/Go/antlr/v4/atn_simulator.go b/runtime/Go/antlr/v4/atn_simulator.go new file mode 100644 index 0000000000..41529115fa --- /dev/null +++ b/runtime/Go/antlr/v4/atn_simulator.go @@ -0,0 +1,50 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +var ATNSimulatorError = NewDFAState(0x7FFFFFFF, NewBaseATNConfigSet(false)) + +type IATNSimulator interface { + SharedContextCache() *PredictionContextCache + ATN() *ATN + DecisionToDFA() []*DFA +} + +type BaseATNSimulator struct { + atn *ATN + sharedContextCache *PredictionContextCache + decisionToDFA []*DFA +} + +func NewBaseATNSimulator(atn *ATN, sharedContextCache *PredictionContextCache) *BaseATNSimulator { + b := new(BaseATNSimulator) + + b.atn = atn + b.sharedContextCache = sharedContextCache + + return b +} + +func (b *BaseATNSimulator) getCachedContext(context PredictionContext) PredictionContext { + if b.sharedContextCache == nil { + return context + } + + visited := make(map[PredictionContext]PredictionContext) + + return getCachedBasePredictionContext(context, b.sharedContextCache, visited) +} + +func (b *BaseATNSimulator) SharedContextCache() *PredictionContextCache { + return b.sharedContextCache +} + +func (b *BaseATNSimulator) ATN() *ATN { + return b.atn +} + +func (b *BaseATNSimulator) DecisionToDFA() []*DFA { + return b.decisionToDFA +} diff --git a/runtime/Go/antlr/v4/atn_state.go b/runtime/Go/antlr/v4/atn_state.go new file mode 100644 index 0000000000..1f2a56bc31 --- /dev/null +++ b/runtime/Go/antlr/v4/atn_state.go @@ -0,0 +1,393 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "strconv" + +// Constants for serialization. +const ( + ATNStateInvalidType = 0 + ATNStateBasic = 1 + ATNStateRuleStart = 2 + ATNStateBlockStart = 3 + ATNStatePlusBlockStart = 4 + ATNStateStarBlockStart = 5 + ATNStateTokenStart = 6 + ATNStateRuleStop = 7 + ATNStateBlockEnd = 8 + ATNStateStarLoopBack = 9 + ATNStateStarLoopEntry = 10 + ATNStatePlusLoopBack = 11 + ATNStateLoopEnd = 12 + + ATNStateInvalidStateNumber = -1 +) + +var ATNStateInitialNumTransitions = 4 + +type ATNState interface { + GetEpsilonOnlyTransitions() bool + + GetRuleIndex() int + SetRuleIndex(int) + + GetNextTokenWithinRule() *IntervalSet + SetNextTokenWithinRule(*IntervalSet) + + GetATN() *ATN + SetATN(*ATN) + + GetStateType() int + + GetStateNumber() int + SetStateNumber(int) + + GetTransitions() []Transition + SetTransitions([]Transition) + AddTransition(Transition, int) + + String() string + Hash() int + Equals(Collectable[ATNState]) bool +} + +type BaseATNState struct { + // NextTokenWithinRule caches lookahead during parsing. Not used during construction. + NextTokenWithinRule *IntervalSet + + // atn is the current ATN. + atn *ATN + + epsilonOnlyTransitions bool + + // ruleIndex tracks the Rule index because there are no Rule objects at runtime. + ruleIndex int + + stateNumber int + + stateType int + + // Track the transitions emanating from this ATN state. + transitions []Transition +} + +func NewBaseATNState() *BaseATNState { + return &BaseATNState{stateNumber: ATNStateInvalidStateNumber, stateType: ATNStateInvalidType} +} + +func (as *BaseATNState) GetRuleIndex() int { + return as.ruleIndex +} + +func (as *BaseATNState) SetRuleIndex(v int) { + as.ruleIndex = v +} +func (as *BaseATNState) GetEpsilonOnlyTransitions() bool { + return as.epsilonOnlyTransitions +} + +func (as *BaseATNState) GetATN() *ATN { + return as.atn +} + +func (as *BaseATNState) SetATN(atn *ATN) { + as.atn = atn +} + +func (as *BaseATNState) GetTransitions() []Transition { + return as.transitions +} + +func (as *BaseATNState) SetTransitions(t []Transition) { + as.transitions = t +} + +func (as *BaseATNState) GetStateType() int { + return as.stateType +} + +func (as *BaseATNState) GetStateNumber() int { + return as.stateNumber +} + +func (as *BaseATNState) SetStateNumber(stateNumber int) { + as.stateNumber = stateNumber +} + +func (as *BaseATNState) GetNextTokenWithinRule() *IntervalSet { + return as.NextTokenWithinRule +} + +func (as *BaseATNState) SetNextTokenWithinRule(v *IntervalSet) { + as.NextTokenWithinRule = v +} + +func (as *BaseATNState) Hash() int { + return as.stateNumber +} + +func (as *BaseATNState) String() string { + return strconv.Itoa(as.stateNumber) +} + +func (as *BaseATNState) Equals(other Collectable[ATNState]) bool { + if ot, ok := other.(ATNState); ok { + return as.stateNumber == ot.GetStateNumber() + } + + return false +} + +func (as *BaseATNState) isNonGreedyExitState() bool { + return false +} + +func (as *BaseATNState) AddTransition(trans Transition, index int) { + if len(as.transitions) == 0 { + as.epsilonOnlyTransitions = trans.getIsEpsilon() + } else if as.epsilonOnlyTransitions != trans.getIsEpsilon() { + as.epsilonOnlyTransitions = false + } + + if index == -1 { + as.transitions = append(as.transitions, trans) + } else { + as.transitions = append(as.transitions[:index], append([]Transition{trans}, as.transitions[index:]...)...) + // TODO: as.transitions.splice(index, 1, trans) + } +} + +type BasicState struct { + *BaseATNState +} + +func NewBasicState() *BasicState { + b := NewBaseATNState() + + b.stateType = ATNStateBasic + + return &BasicState{BaseATNState: b} +} + +type DecisionState interface { + ATNState + + getDecision() int + setDecision(int) + + getNonGreedy() bool + setNonGreedy(bool) +} + +type BaseDecisionState struct { + *BaseATNState + decision int + nonGreedy bool +} + +func NewBaseDecisionState() *BaseDecisionState { + return &BaseDecisionState{BaseATNState: NewBaseATNState(), decision: -1} +} + +func (s *BaseDecisionState) getDecision() int { + return s.decision +} + +func (s *BaseDecisionState) setDecision(b int) { + s.decision = b +} + +func (s *BaseDecisionState) getNonGreedy() bool { + return s.nonGreedy +} + +func (s *BaseDecisionState) setNonGreedy(b bool) { + s.nonGreedy = b +} + +type BlockStartState interface { + DecisionState + + getEndState() *BlockEndState + setEndState(*BlockEndState) +} + +// BaseBlockStartState is the start of a regular (...) block. +type BaseBlockStartState struct { + *BaseDecisionState + endState *BlockEndState +} + +func NewBlockStartState() *BaseBlockStartState { + return &BaseBlockStartState{BaseDecisionState: NewBaseDecisionState()} +} + +func (s *BaseBlockStartState) getEndState() *BlockEndState { + return s.endState +} + +func (s *BaseBlockStartState) setEndState(b *BlockEndState) { + s.endState = b +} + +type BasicBlockStartState struct { + *BaseBlockStartState +} + +func NewBasicBlockStartState() *BasicBlockStartState { + b := NewBlockStartState() + + b.stateType = ATNStateBlockStart + + return &BasicBlockStartState{BaseBlockStartState: b} +} + +var _ BlockStartState = &BasicBlockStartState{} + +// BlockEndState is a terminal node of a simple (a|b|c) block. +type BlockEndState struct { + *BaseATNState + startState ATNState +} + +func NewBlockEndState() *BlockEndState { + b := NewBaseATNState() + + b.stateType = ATNStateBlockEnd + + return &BlockEndState{BaseATNState: b} +} + +// RuleStopState is the last node in the ATN for a rule, unless that rule is the +// start symbol. In that case, there is one transition to EOF. Later, we might +// encode references to all calls to this rule to compute FOLLOW sets for error +// handling. +type RuleStopState struct { + *BaseATNState +} + +func NewRuleStopState() *RuleStopState { + b := NewBaseATNState() + + b.stateType = ATNStateRuleStop + + return &RuleStopState{BaseATNState: b} +} + +type RuleStartState struct { + *BaseATNState + stopState ATNState + isPrecedenceRule bool +} + +func NewRuleStartState() *RuleStartState { + b := NewBaseATNState() + + b.stateType = ATNStateRuleStart + + return &RuleStartState{BaseATNState: b} +} + +// PlusLoopbackState is a decision state for A+ and (A|B)+. It has two +// transitions: one to the loop back to start of the block, and one to exit. +type PlusLoopbackState struct { + *BaseDecisionState +} + +func NewPlusLoopbackState() *PlusLoopbackState { + b := NewBaseDecisionState() + + b.stateType = ATNStatePlusLoopBack + + return &PlusLoopbackState{BaseDecisionState: b} +} + +// PlusBlockStartState is the start of a (A|B|...)+ loop. Technically it is a +// decision state; we don't use it for code generation. Somebody might need it, +// it is included for completeness. In reality, PlusLoopbackState is the real +// decision-making node for A+. +type PlusBlockStartState struct { + *BaseBlockStartState + loopBackState ATNState +} + +func NewPlusBlockStartState() *PlusBlockStartState { + b := NewBlockStartState() + + b.stateType = ATNStatePlusBlockStart + + return &PlusBlockStartState{BaseBlockStartState: b} +} + +var _ BlockStartState = &PlusBlockStartState{} + +// StarBlockStartState is the block that begins a closure loop. +type StarBlockStartState struct { + *BaseBlockStartState +} + +func NewStarBlockStartState() *StarBlockStartState { + b := NewBlockStartState() + + b.stateType = ATNStateStarBlockStart + + return &StarBlockStartState{BaseBlockStartState: b} +} + +var _ BlockStartState = &StarBlockStartState{} + +type StarLoopbackState struct { + *BaseATNState +} + +func NewStarLoopbackState() *StarLoopbackState { + b := NewBaseATNState() + + b.stateType = ATNStateStarLoopBack + + return &StarLoopbackState{BaseATNState: b} +} + +type StarLoopEntryState struct { + *BaseDecisionState + loopBackState ATNState + precedenceRuleDecision bool +} + +func NewStarLoopEntryState() *StarLoopEntryState { + b := NewBaseDecisionState() + + b.stateType = ATNStateStarLoopEntry + + // False precedenceRuleDecision indicates whether s state can benefit from a precedence DFA during SLL decision making. + return &StarLoopEntryState{BaseDecisionState: b} +} + +// LoopEndState marks the end of a * or + loop. +type LoopEndState struct { + *BaseATNState + loopBackState ATNState +} + +func NewLoopEndState() *LoopEndState { + b := NewBaseATNState() + + b.stateType = ATNStateLoopEnd + + return &LoopEndState{BaseATNState: b} +} + +// TokensStartState is the Tokens rule start state linking to each lexer rule start state. +type TokensStartState struct { + *BaseDecisionState +} + +func NewTokensStartState() *TokensStartState { + b := NewBaseDecisionState() + + b.stateType = ATNStateTokenStart + + return &TokensStartState{BaseDecisionState: b} +} diff --git a/runtime/Go/antlr/v4/atn_type.go b/runtime/Go/antlr/v4/atn_type.go new file mode 100644 index 0000000000..3a515a145f --- /dev/null +++ b/runtime/Go/antlr/v4/atn_type.go @@ -0,0 +1,11 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +// Represent the type of recognizer an ATN applies to. +const ( + ATNTypeLexer = 0 + ATNTypeParser = 1 +) diff --git a/runtime/Go/antlr/v4/atnconfigset_test.go b/runtime/Go/antlr/v4/atnconfigset_test.go new file mode 100644 index 0000000000..3f1e9cc6cb --- /dev/null +++ b/runtime/Go/antlr/v4/atnconfigset_test.go @@ -0,0 +1,73 @@ +package antlr + +import ( + "testing" +) + +// Test for Issue # 3319 +// To run, "cd antlr4/runtime/Go/antlr/", then "go test". +// In the old runtime code, the test would crash because it would try +// to compare a *LexerActionExecutor with nil, causing a nil pointer dereference. +// It only happens if there were different states that had equal stateNumber mod 16, +// and you created that ATNConfig with a nil LexerActionExecutor. (That's why this +// code has a hardwired constant of 16. + +func TestCompare(t *testing.T) { + var set = NewOrderedATNConfigSet() + var s0 = NewBaseATNState() + var s1 = NewBaseATNState() + var s2 = NewBaseATNState() + var s3 = NewBaseATNState() + var s16 = NewBaseATNState() + s16.SetStateNumber(16) + var s17 = NewBaseATNState() + s17.SetStateNumber(17) + var s18 = NewBaseATNState() + s18.SetStateNumber(18) + var s19 = NewBaseATNState() + s19.SetStateNumber(19) + var la0 = NewBaseLexerAction(1) + var la1 = NewBaseLexerAction(2) + var laa = make([]LexerAction, 2) + laa[0] = la0 + laa[1] = la1 + var ae = NewLexerActionExecutor(laa) + set.Add(NewLexerATNConfig5(s0, 0, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s0, 1, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s0, 2, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s1, 0, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s1, 1, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s1, 2, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s2, 0, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s2, 1, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s2, 2, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s3, 0, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s3, 1, BasePredictionContextEMPTY, ae), nil) + set.Add(NewLexerATNConfig5(s3, 2, BasePredictionContextEMPTY, ae), nil) + + set.Add(NewLexerATNConfig5(s0, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s0, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s0, 2, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s1, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s1, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s1, 2, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s2, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s2, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s2, 2, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s3, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s3, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s3, 2, BasePredictionContextEMPTY, nil), nil) + + set.Add(NewLexerATNConfig5(s16, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s16, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s16, 2, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s17, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s17, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s17, 2, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s18, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s18, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s18, 2, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s19, 0, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s19, 1, BasePredictionContextEMPTY, nil), nil) + set.Add(NewLexerATNConfig5(s19, 2, BasePredictionContextEMPTY, nil), nil) +} diff --git a/runtime/Go/antlr/v4/char_stream.go b/runtime/Go/antlr/v4/char_stream.go new file mode 100644 index 0000000000..c33f0adb5e --- /dev/null +++ b/runtime/Go/antlr/v4/char_stream.go @@ -0,0 +1,12 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +type CharStream interface { + IntStream + GetText(int, int) string + GetTextFromTokens(start, end Token) string + GetTextFromInterval(*Interval) string +} diff --git a/runtime/Go/antlr/v4/common_token_factory.go b/runtime/Go/antlr/v4/common_token_factory.go new file mode 100644 index 0000000000..1bb0314ea0 --- /dev/null +++ b/runtime/Go/antlr/v4/common_token_factory.go @@ -0,0 +1,56 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +// TokenFactory creates CommonToken objects. +type TokenFactory interface { + Create(source *TokenSourceCharStreamPair, ttype int, text string, channel, start, stop, line, column int) Token +} + +// CommonTokenFactory is the default TokenFactory implementation. +type CommonTokenFactory struct { + // copyText indicates whether CommonToken.setText should be called after + // constructing tokens to explicitly set the text. This is useful for cases + // where the input stream might not be able to provide arbitrary substrings of + // text from the input after the lexer creates a token (e.g. the + // implementation of CharStream.GetText in UnbufferedCharStream panics an + // UnsupportedOperationException). Explicitly setting the token text allows + // Token.GetText to be called at any time regardless of the input stream + // implementation. + // + // The default value is false to avoid the performance and memory overhead of + // copying text for every token unless explicitly requested. + copyText bool +} + +func NewCommonTokenFactory(copyText bool) *CommonTokenFactory { + return &CommonTokenFactory{copyText: copyText} +} + +// CommonTokenFactoryDEFAULT is the default CommonTokenFactory. It does not +// explicitly copy token text when constructing tokens. +var CommonTokenFactoryDEFAULT = NewCommonTokenFactory(false) + +func (c *CommonTokenFactory) Create(source *TokenSourceCharStreamPair, ttype int, text string, channel, start, stop, line, column int) Token { + t := NewCommonToken(source, ttype, channel, start, stop) + + t.line = line + t.column = column + + if text != "" { + t.SetText(text) + } else if c.copyText && source.charStream != nil { + t.SetText(source.charStream.GetTextFromInterval(NewInterval(start, stop))) + } + + return t +} + +func (c *CommonTokenFactory) createThin(ttype int, text string) Token { + t := NewCommonToken(nil, ttype, TokenDefaultChannel, -1, -1) + t.SetText(text) + + return t +} diff --git a/runtime/Go/antlr/v4/common_token_stream.go b/runtime/Go/antlr/v4/common_token_stream.go new file mode 100644 index 0000000000..c6c9485a20 --- /dev/null +++ b/runtime/Go/antlr/v4/common_token_stream.go @@ -0,0 +1,449 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "strconv" +) + +// CommonTokenStream is an implementation of TokenStream that loads tokens from +// a TokenSource on-demand and places the tokens in a buffer to provide access +// to any previous token by index. This token stream ignores the value of +// Token.getChannel. If your parser requires the token stream filter tokens to +// only those on a particular channel, such as Token.DEFAULT_CHANNEL or +// Token.HIDDEN_CHANNEL, use a filtering token stream such a CommonTokenStream. +type CommonTokenStream struct { + channel int + + // fetchedEOF indicates whether the Token.EOF token has been fetched from + // tokenSource and added to tokens. This field improves performance for the + // following cases: + // + // consume: The lookahead check in consume to preven consuming the EOF symbol is + // optimized by checking the values of fetchedEOF and p instead of calling LA. + // + // fetch: The check to prevent adding multiple EOF symbols into tokens is + // trivial with bt field. + fetchedEOF bool + + // index indexs into tokens of the current token (next token to consume). + // tokens[p] should be LT(1). It is set to -1 when the stream is first + // constructed or when SetTokenSource is called, indicating that the first token + // has not yet been fetched from the token source. For additional information, + // see the documentation of IntStream for a description of initializing methods. + index int + + // tokenSource is the TokenSource from which tokens for the bt stream are + // fetched. + tokenSource TokenSource + + // tokens is all tokens fetched from the token source. The list is considered a + // complete view of the input once fetchedEOF is set to true. + tokens []Token +} + +func NewCommonTokenStream(lexer Lexer, channel int) *CommonTokenStream { + return &CommonTokenStream{ + channel: channel, + index: -1, + tokenSource: lexer, + tokens: make([]Token, 0), + } +} + +func (c *CommonTokenStream) GetAllTokens() []Token { + return c.tokens +} + +func (c *CommonTokenStream) Mark() int { + return 0 +} + +func (c *CommonTokenStream) Release(marker int) {} + +func (c *CommonTokenStream) reset() { + c.Seek(0) +} + +func (c *CommonTokenStream) Seek(index int) { + c.lazyInit() + c.index = c.adjustSeekIndex(index) +} + +func (c *CommonTokenStream) Get(index int) Token { + c.lazyInit() + + return c.tokens[index] +} + +func (c *CommonTokenStream) Consume() { + SkipEOFCheck := false + + if c.index >= 0 { + if c.fetchedEOF { + // The last token in tokens is EOF. Skip the check if p indexes any fetched. + // token except the last. + SkipEOFCheck = c.index < len(c.tokens)-1 + } else { + // No EOF token in tokens. Skip the check if p indexes a fetched token. + SkipEOFCheck = c.index < len(c.tokens) + } + } else { + // Not yet initialized + SkipEOFCheck = false + } + + if !SkipEOFCheck && c.LA(1) == TokenEOF { + panic("cannot consume EOF") + } + + if c.Sync(c.index + 1) { + c.index = c.adjustSeekIndex(c.index + 1) + } +} + +// Sync makes sure index i in tokens has a token and returns true if a token is +// located at index i and otherwise false. +func (c *CommonTokenStream) Sync(i int) bool { + n := i - len(c.tokens) + 1 // TODO: How many more elements do we need? + + if n > 0 { + fetched := c.fetch(n) + return fetched >= n + } + + return true +} + +// fetch adds n elements to buffer and returns the actual number of elements +// added to the buffer. +func (c *CommonTokenStream) fetch(n int) int { + if c.fetchedEOF { + return 0 + } + + for i := 0; i < n; i++ { + t := c.tokenSource.NextToken() + + t.SetTokenIndex(len(c.tokens)) + c.tokens = append(c.tokens, t) + + if t.GetTokenType() == TokenEOF { + c.fetchedEOF = true + + return i + 1 + } + } + + return n +} + +// GetTokens gets all tokens from start to stop inclusive. +func (c *CommonTokenStream) GetTokens(start int, stop int, types *IntervalSet) []Token { + if start < 0 || stop < 0 { + return nil + } + + c.lazyInit() + + subset := make([]Token, 0) + + if stop >= len(c.tokens) { + stop = len(c.tokens) - 1 + } + + for i := start; i < stop; i++ { + t := c.tokens[i] + + if t.GetTokenType() == TokenEOF { + break + } + + if types == nil || types.contains(t.GetTokenType()) { + subset = append(subset, t) + } + } + + return subset +} + +func (c *CommonTokenStream) LA(i int) int { + return c.LT(i).GetTokenType() +} + +func (c *CommonTokenStream) lazyInit() { + if c.index == -1 { + c.setup() + } +} + +func (c *CommonTokenStream) setup() { + c.Sync(0) + c.index = c.adjustSeekIndex(0) +} + +func (c *CommonTokenStream) GetTokenSource() TokenSource { + return c.tokenSource +} + +// SetTokenSource resets the c token stream by setting its token source. +func (c *CommonTokenStream) SetTokenSource(tokenSource TokenSource) { + c.tokenSource = tokenSource + c.tokens = make([]Token, 0) + c.index = -1 +} + +// NextTokenOnChannel returns the index of the next token on channel given a +// starting index. Returns i if tokens[i] is on channel. Returns -1 if there are +// no tokens on channel between i and EOF. +func (c *CommonTokenStream) NextTokenOnChannel(i, channel int) int { + c.Sync(i) + + if i >= len(c.tokens) { + return -1 + } + + token := c.tokens[i] + + for token.GetChannel() != c.channel { + if token.GetTokenType() == TokenEOF { + return -1 + } + + i++ + c.Sync(i) + token = c.tokens[i] + } + + return i +} + +// previousTokenOnChannel returns the index of the previous token on channel +// given a starting index. Returns i if tokens[i] is on channel. Returns -1 if +// there are no tokens on channel between i and 0. +func (c *CommonTokenStream) previousTokenOnChannel(i, channel int) int { + for i >= 0 && c.tokens[i].GetChannel() != channel { + i-- + } + + return i +} + +// GetHiddenTokensToRight collects all tokens on a specified channel to the +// right of the current token up until we see a token on DEFAULT_TOKEN_CHANNEL +// or EOF. If channel is -1, it finds any non-default channel token. +func (c *CommonTokenStream) GetHiddenTokensToRight(tokenIndex, channel int) []Token { + c.lazyInit() + + if tokenIndex < 0 || tokenIndex >= len(c.tokens) { + panic(strconv.Itoa(tokenIndex) + " not in 0.." + strconv.Itoa(len(c.tokens)-1)) + } + + nextOnChannel := c.NextTokenOnChannel(tokenIndex+1, LexerDefaultTokenChannel) + from := tokenIndex + 1 + + // If no onchannel to the right, then nextOnChannel == -1, so set to to last token + var to int + + if nextOnChannel == -1 { + to = len(c.tokens) - 1 + } else { + to = nextOnChannel + } + + return c.filterForChannel(from, to, channel) +} + +// GetHiddenTokensToLeft collects all tokens on channel to the left of the +// current token until we see a token on DEFAULT_TOKEN_CHANNEL. If channel is +// -1, it finds any non default channel token. +func (c *CommonTokenStream) GetHiddenTokensToLeft(tokenIndex, channel int) []Token { + c.lazyInit() + + if tokenIndex < 0 || tokenIndex >= len(c.tokens) { + panic(strconv.Itoa(tokenIndex) + " not in 0.." + strconv.Itoa(len(c.tokens)-1)) + } + + prevOnChannel := c.previousTokenOnChannel(tokenIndex-1, LexerDefaultTokenChannel) + + if prevOnChannel == tokenIndex-1 { + return nil + } + + // If there are none on channel to the left and prevOnChannel == -1 then from = 0 + from := prevOnChannel + 1 + to := tokenIndex - 1 + + return c.filterForChannel(from, to, channel) +} + +func (c *CommonTokenStream) filterForChannel(left, right, channel int) []Token { + hidden := make([]Token, 0) + + for i := left; i < right+1; i++ { + t := c.tokens[i] + + if channel == -1 { + if t.GetChannel() != LexerDefaultTokenChannel { + hidden = append(hidden, t) + } + } else if t.GetChannel() == channel { + hidden = append(hidden, t) + } + } + + if len(hidden) == 0 { + return nil + } + + return hidden +} + +func (c *CommonTokenStream) GetSourceName() string { + return c.tokenSource.GetSourceName() +} + +func (c *CommonTokenStream) Size() int { + return len(c.tokens) +} + +func (c *CommonTokenStream) Index() int { + return c.index +} + +func (c *CommonTokenStream) GetAllText() string { + return c.GetTextFromInterval(nil) +} + +func (c *CommonTokenStream) GetTextFromTokens(start, end Token) string { + if start == nil || end == nil { + return "" + } + + return c.GetTextFromInterval(NewInterval(start.GetTokenIndex(), end.GetTokenIndex())) +} + +func (c *CommonTokenStream) GetTextFromRuleContext(interval RuleContext) string { + return c.GetTextFromInterval(interval.GetSourceInterval()) +} + +func (c *CommonTokenStream) GetTextFromInterval(interval *Interval) string { + c.lazyInit() + + if interval == nil { + c.Fill() + interval = NewInterval(0, len(c.tokens)-1) + } else { + c.Sync(interval.Stop) + } + + start := interval.Start + stop := interval.Stop + + if start < 0 || stop < 0 { + return "" + } + + if stop >= len(c.tokens) { + stop = len(c.tokens) - 1 + } + + s := "" + + for i := start; i < stop+1; i++ { + t := c.tokens[i] + + if t.GetTokenType() == TokenEOF { + break + } + + s += t.GetText() + } + + return s +} + +// Fill gets all tokens from the lexer until EOF. +func (c *CommonTokenStream) Fill() { + c.lazyInit() + + for c.fetch(1000) == 1000 { + continue + } +} + +func (c *CommonTokenStream) adjustSeekIndex(i int) int { + return c.NextTokenOnChannel(i, c.channel) +} + +func (c *CommonTokenStream) LB(k int) Token { + if k == 0 || c.index-k < 0 { + return nil + } + + i := c.index + n := 1 + + // Find k good tokens looking backward + for n <= k { + // Skip off-channel tokens + i = c.previousTokenOnChannel(i-1, c.channel) + n++ + } + + if i < 0 { + return nil + } + + return c.tokens[i] +} + +func (c *CommonTokenStream) LT(k int) Token { + c.lazyInit() + + if k == 0 { + return nil + } + + if k < 0 { + return c.LB(-k) + } + + i := c.index + n := 1 // We know tokens[n] is valid + + // Find k good tokens + for n < k { + // Skip off-channel tokens, but make sure to not look past EOF + if c.Sync(i + 1) { + i = c.NextTokenOnChannel(i+1, c.channel) + } + + n++ + } + + return c.tokens[i] +} + +// getNumberOfOnChannelTokens counts EOF once. +func (c *CommonTokenStream) getNumberOfOnChannelTokens() int { + var n int + + c.Fill() + + for i := 0; i < len(c.tokens); i++ { + t := c.tokens[i] + + if t.GetChannel() == c.channel { + n++ + } + + if t.GetTokenType() == TokenEOF { + break + } + } + + return n +} diff --git a/runtime/Go/antlr/v4/common_token_stream_test.go b/runtime/Go/antlr/v4/common_token_stream_test.go new file mode 100644 index 0000000000..e7c75d49b1 --- /dev/null +++ b/runtime/Go/antlr/v4/common_token_stream_test.go @@ -0,0 +1,178 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "testing" +) + +type commonTokenStreamTestLexer struct { + *BaseLexer + + tokens []Token + i int +} + +func (l *commonTokenStreamTestLexer) NextToken() Token { + tmp := l.tokens[l.i] + l.i++ + return tmp +} + +func TestCommonTokenStreamOffChannel(t *testing.T) { + assert := assertNew(t) + lexEngine := &commonTokenStreamTestLexer{ + tokens: []Token{ + newTestCommonToken(1, " ", LexerHidden), // 0 + newTestCommonToken(1, "x", LexerDefaultTokenChannel), // 1 + newTestCommonToken(1, " ", LexerHidden), // 2 + newTestCommonToken(1, "=", LexerDefaultTokenChannel), // 3 + newTestCommonToken(1, "34", LexerDefaultTokenChannel), // 4 + newTestCommonToken(1, " ", LexerHidden), // 5 + newTestCommonToken(1, " ", LexerHidden), // 6 + newTestCommonToken(1, ";", LexerDefaultTokenChannel), // 7 + newTestCommonToken(1, "\n", LexerHidden), // 9 + newTestCommonToken(TokenEOF, "", LexerDefaultTokenChannel), // 10 + }, + } + tokens := NewCommonTokenStream(lexEngine, TokenDefaultChannel) + + assert.Equal("x", tokens.LT(1).GetText()) // must skip first off channel token + tokens.Consume() + assert.Equal("=", tokens.LT(1).GetText()) + assert.Equal("x", tokens.LT(-1).GetText()) + + tokens.Consume() + assert.Equal("34", tokens.LT(1).GetText()) + assert.Equal("=", tokens.LT(-1).GetText()) + + tokens.Consume() + assert.Equal(";", tokens.LT(1).GetText()) + assert.Equal("34", tokens.LT(-1).GetText()) + + tokens.Consume() + assert.Equal(TokenEOF, tokens.LT(1).GetTokenType()) + assert.Equal(";", tokens.LT(-1).GetText()) + + assert.Equal("34", tokens.LT(-2).GetText()) + assert.Equal("=", tokens.LT(-3).GetText()) + assert.Equal("x", tokens.LT(-4).GetText()) +} + +func TestCommonTokenStreamFetchOffChannel(t *testing.T) { + assert := assertNew(t) + lexEngine := &commonTokenStreamTestLexer{ + tokens: []Token{ + newTestCommonToken(1, " ", LexerHidden), // 0 + newTestCommonToken(1, "x", LexerDefaultTokenChannel), // 1 + newTestCommonToken(1, " ", LexerHidden), // 2 + newTestCommonToken(1, "=", LexerDefaultTokenChannel), // 3 + newTestCommonToken(1, "34", LexerDefaultTokenChannel), // 4 + newTestCommonToken(1, " ", LexerHidden), // 5 + newTestCommonToken(1, " ", LexerHidden), // 6 + newTestCommonToken(1, ";", LexerDefaultTokenChannel), // 7 + newTestCommonToken(1, " ", LexerHidden), // 8 + newTestCommonToken(1, "\n", LexerHidden), // 9 + newTestCommonToken(TokenEOF, "", LexerDefaultTokenChannel), // 10 + }, + } + tokens := NewCommonTokenStream(lexEngine, TokenDefaultChannel) + tokens.Fill() + + assert.Nil(tokens.GetHiddenTokensToLeft(0, -1)) + assert.Nil(tokens.GetHiddenTokensToRight(0, -1)) + + assert.Equal("[[@0,0:0=' ',<1>,channel=1,0:-1]]", tokensToString(tokens.GetHiddenTokensToLeft(1, -1))) + assert.Equal("[[@2,0:0=' ',<1>,channel=1,0:-1]]", tokensToString(tokens.GetHiddenTokensToRight(1, -1))) + + assert.Nil(tokens.GetHiddenTokensToLeft(2, -1)) + assert.Nil(tokens.GetHiddenTokensToRight(2, -1)) + + assert.Equal("[[@2,0:0=' ',<1>,channel=1,0:-1]]", tokensToString(tokens.GetHiddenTokensToLeft(3, -1))) + assert.Nil(tokens.GetHiddenTokensToRight(3, -1)) + + assert.Nil(tokens.GetHiddenTokensToLeft(4, -1)) + assert.Equal("[[@5,0:0=' ',<1>,channel=1,0:-1], [@6,0:0=' ',<1>,channel=1,0:-1]]", + tokensToString(tokens.GetHiddenTokensToRight(4, -1))) + + assert.Nil(tokens.GetHiddenTokensToLeft(5, -1)) + assert.Equal("[[@6,0:0=' ',<1>,channel=1,0:-1]]", + tokensToString(tokens.GetHiddenTokensToRight(5, -1))) + + assert.Equal("[[@5,0:0=' ',<1>,channel=1,0:-1]]", + tokensToString(tokens.GetHiddenTokensToLeft(6, -1))) + assert.Nil(tokens.GetHiddenTokensToRight(6, -1)) + + assert.Equal("[[@5,0:0=' ',<1>,channel=1,0:-1], [@6,0:0=' ',<1>,channel=1,0:-1]]", + tokensToString(tokens.GetHiddenTokensToLeft(7, -1))) + assert.Equal("[[@8,0:0=' ',<1>,channel=1,0:-1], [@9,0:0='\\n',<1>,channel=1,0:-1]]", + tokensToString(tokens.GetHiddenTokensToRight(7, -1))) + + assert.Nil(tokens.GetHiddenTokensToLeft(8, -1)) + assert.Equal("[[@9,0:0='\\n',<1>,channel=1,0:-1]]", + tokensToString(tokens.GetHiddenTokensToRight(8, -1))) + + assert.Equal("[[@8,0:0=' ',<1>,channel=1,0:-1]]", + tokensToString(tokens.GetHiddenTokensToLeft(9, -1))) + assert.Nil(tokens.GetHiddenTokensToRight(9, -1)) + +} + +type commonTokenStreamTestLexerSingleEOF struct { + *BaseLexer + + tokens []Token + i int +} + +func (l *commonTokenStreamTestLexerSingleEOF) NextToken() Token { + return newTestCommonToken(TokenEOF, "", LexerDefaultTokenChannel) +} + +func TestCommonTokenStreamSingleEOF(t *testing.T) { + assert := assertNew(t) + lexEngine := &commonTokenStreamTestLexerSingleEOF{} + tokens := NewCommonTokenStream(lexEngine, TokenDefaultChannel) + tokens.Fill() + + assert.Equal(TokenEOF, tokens.LA(1)) + assert.Equal(0, tokens.index) + assert.Equal(1, tokens.Size()) +} + +func TestCommonTokenStreamCannotConsumeEOF(t *testing.T) { + assert := assertNew(t) + lexEngine := &commonTokenStreamTestLexerSingleEOF{} + tokens := NewCommonTokenStream(lexEngine, TokenDefaultChannel) + tokens.Fill() + assert.Equal(TokenEOF, tokens.LA(1)) + assert.Equal(0, tokens.index) + assert.Equal(1, tokens.Size()) + assert.Panics(tokens.Consume) +} + +func TestCommonTokenStreamGetTextFromInterval(t *testing.T) { + assert := assertNew(t) + lexEngine := &commonTokenStreamTestLexer{ + tokens: []Token{ + newTestCommonToken(1, " ", LexerHidden), // 0 + newTestCommonToken(1, "x", LexerDefaultTokenChannel), // 1 + newTestCommonToken(1, " ", LexerHidden), // 2 + newTestCommonToken(1, "=", LexerDefaultTokenChannel), // 3 + newTestCommonToken(1, "34", LexerDefaultTokenChannel), // 4 + newTestCommonToken(1, " ", LexerHidden), // 5 + newTestCommonToken(1, " ", LexerHidden), // 6 + newTestCommonToken(1, ";", LexerDefaultTokenChannel), // 7 + newTestCommonToken(1, " ", LexerHidden), // 8 + newTestCommonToken(1, "\n", LexerHidden), // 9 + newTestCommonToken(TokenEOF, "", LexerDefaultTokenChannel), // 10 + }, + } + tokens := NewCommonTokenStream(lexEngine, TokenDefaultChannel) + assert.Equal("x", tokens.GetTextFromInterval(&Interval{Start: 1, Stop: 1})) + assert.Equal(len(tokens.tokens), 2) + assert.Equal(" x =34 ; \n", tokens.GetTextFromInterval(nil)) + assert.Equal(len(tokens.tokens), 11) +} diff --git a/runtime/Go/antlr/v4/comparators.go b/runtime/Go/antlr/v4/comparators.go new file mode 100644 index 0000000000..fbe76c33e0 --- /dev/null +++ b/runtime/Go/antlr/v4/comparators.go @@ -0,0 +1,137 @@ +package antlr + +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +// This file contains all the implementations of custom comparators used for generic collections when the +// Hash() and Equals() funcs supplied by the struct objects themselves need to be overridden. Normally, we would +// put the comparators in the source file for the struct themselves, but given the organization of this code is +// sorta kinda based upon the Java code, I found it confusing trying to find out which comparator was where and used by +// which instantiation of a collection. For instance, an Array2DHashSet in the Java source, when used with ATNConfig +// collections requires three different comparators depending on what the collection is being used for. Collecting - pun intended - +// all the comparators here, makes it much easier to see which implementation of hash and equals is used by which collection. +// It also makes it easy to verify that the Hash() and Equals() functions marry up with the Java implementations. + +// ObjEqComparator is the equivalent of the Java ObjectEqualityComparator, which is the default instance of +// Equality comparator. We do not have inheritance in Go, only interfaces, so we use generics to enforce some +// type safety and avoid having to implement this for every type that we want to perform comparison on. +// +// This comparator works by using the standard Hash() and Equals() methods of the type T that is being compared. Which +// allows us to use it in any collection instance that does nto require a special hash or equals implementation. +type ObjEqComparator[T Collectable[T]] struct{} + +// Equals2 delegates to the Equals() method of type T +func (c *ObjEqComparator[T]) Equals2(o1, o2 T) bool { + return o1.Equals(o2) +} + +// Hash1 delegates to the Hash() method of type T +func (c *ObjEqComparator[T]) Hash1(o T) int { + + return o.Hash() +} + +type SemCComparator[T Collectable[T]] struct{} + +// ATNConfigComparator is used as the compartor for the configLookup field of an ATNConfigSet +// and has a custom Equals() and Hash() implementation, because equality is not based on the +// standard Hash() and Equals() methods of the ATNConfig type. +type ATNConfigComparator[T Collectable[T]] struct { +} + +// Equals2 is a custom comparator for ATNConfigs specifically for configLookup +func (c *ATNConfigComparator[T]) Equals2(o1, o2 ATNConfig) bool { + + // Same pointer, must be equal, even if both nil + // + if o1 == o2 { + return true + + } + + // If either are nil, but not both, then the result is false + // + if o1 == nil || o2 == nil { + return false + } + + return o1.GetState().GetStateNumber() == o2.GetState().GetStateNumber() && + o1.GetAlt() == o2.GetAlt() && + o1.GetSemanticContext().Equals(o2.GetSemanticContext()) +} + +// Hash1 is custom hash implementation for ATNConfigs specifically for configLookup +func (c *ATNConfigComparator[T]) Hash1(o ATNConfig) int { + hash := 7 + hash = 31*hash + o.GetState().GetStateNumber() + hash = 31*hash + o.GetAlt() + hash = 31*hash + o.GetSemanticContext().Hash() + return hash +} + +// ATNAltConfigComparator is used as the comparator for mapping configs to Alt Bitsets +type ATNAltConfigComparator[T Collectable[T]] struct { +} + +// Equals2 is a custom comparator for ATNConfigs specifically for configLookup +func (c *ATNAltConfigComparator[T]) Equals2(o1, o2 ATNConfig) bool { + + // Same pointer, must be equal, even if both nil + // + if o1 == o2 { + return true + + } + + // If either are nil, but not both, then the result is false + // + if o1 == nil || o2 == nil { + return false + } + + return o1.GetState().GetStateNumber() == o2.GetState().GetStateNumber() && + o1.GetContext().Equals(o2.GetContext()) +} + +// Hash1 is custom hash implementation for ATNConfigs specifically for configLookup +func (c *ATNAltConfigComparator[T]) Hash1(o ATNConfig) int { + h := murmurInit(7) + h = murmurUpdate(h, o.GetState().GetStateNumber()) + h = murmurUpdate(h, o.GetContext().Hash()) + return murmurFinish(h, 2) +} + +// BaseATNConfigComparator is used as the comparator for the configLookup field of a BaseATNConfigSet +// and has a custom Equals() and Hash() implementation, because equality is not based on the +// standard Hash() and Equals() methods of the ATNConfig type. +type BaseATNConfigComparator[T Collectable[T]] struct { +} + +// Equals2 is a custom comparator for ATNConfigs specifically for baseATNConfigSet +func (c *BaseATNConfigComparator[T]) Equals2(o1, o2 ATNConfig) bool { + + // Same pointer, must be equal, even if both nil + // + if o1 == o2 { + return true + + } + + // If either are nil, but not both, then the result is false + // + if o1 == nil || o2 == nil { + return false + } + + return o1.GetState().GetStateNumber() == o2.GetState().GetStateNumber() && + o1.GetAlt() == o2.GetAlt() && + o1.GetSemanticContext().Equals(o2.GetSemanticContext()) +} + +// Hash1 is custom hash implementation for ATNConfigs specifically for configLookup, but in fact just +// delegates to the standard Hash() method of the ATNConfig type. +func (c *BaseATNConfigComparator[T]) Hash1(o ATNConfig) int { + + return o.Hash() +} diff --git a/runtime/Go/antlr/v4/dfa.go b/runtime/Go/antlr/v4/dfa.go new file mode 100644 index 0000000000..5326baff95 --- /dev/null +++ b/runtime/Go/antlr/v4/dfa.go @@ -0,0 +1,148 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +type DFA struct { + // atnStartState is the ATN state in which this was created + atnStartState DecisionState + + decision int + + // states is all the DFA states. Use Map to get the old state back; Set can only + // indicate whether it is there. Go maps implement key hash collisions and so on and are very + // good, but the DFAState is an object and can't be used directly as the key as it can in say JAva + // amd C#, whereby if the hashcode is the same for two objects, then Equals() is called against them + // to see if they really are the same object. + // + // + states *JStore[*DFAState, *ObjEqComparator[*DFAState]] + + numstates int + + s0 *DFAState + + // precedenceDfa is the backing field for isPrecedenceDfa and setPrecedenceDfa. + // True if the DFA is for a precedence decision and false otherwise. + precedenceDfa bool +} + +func NewDFA(atnStartState DecisionState, decision int) *DFA { + dfa := &DFA{ + atnStartState: atnStartState, + decision: decision, + states: NewJStore[*DFAState, *ObjEqComparator[*DFAState]](&ObjEqComparator[*DFAState]{}), + } + if s, ok := atnStartState.(*StarLoopEntryState); ok && s.precedenceRuleDecision { + dfa.precedenceDfa = true + dfa.s0 = NewDFAState(-1, NewBaseATNConfigSet(false)) + dfa.s0.isAcceptState = false + dfa.s0.requiresFullContext = false + } + return dfa +} + +// getPrecedenceStartState gets the start state for the current precedence and +// returns the start state corresponding to the specified precedence if a start +// state exists for the specified precedence and nil otherwise. d must be a +// precedence DFA. See also isPrecedenceDfa. +func (d *DFA) getPrecedenceStartState(precedence int) *DFAState { + if !d.getPrecedenceDfa() { + panic("only precedence DFAs may contain a precedence start state") + } + + // s0.edges is never nil for a precedence DFA + if precedence < 0 || precedence >= len(d.getS0().getEdges()) { + return nil + } + + return d.getS0().getIthEdge(precedence) +} + +// setPrecedenceStartState sets the start state for the current precedence. d +// must be a precedence DFA. See also isPrecedenceDfa. +func (d *DFA) setPrecedenceStartState(precedence int, startState *DFAState) { + if !d.getPrecedenceDfa() { + panic("only precedence DFAs may contain a precedence start state") + } + + if precedence < 0 { + return + } + + // Synchronization on s0 here is ok. When the DFA is turned into a + // precedence DFA, s0 will be initialized once and not updated again. s0.edges + // is never nil for a precedence DFA. + s0 := d.getS0() + if precedence >= s0.numEdges() { + edges := append(s0.getEdges(), make([]*DFAState, precedence+1-s0.numEdges())...) + s0.setEdges(edges) + d.setS0(s0) + } + + s0.setIthEdge(precedence, startState) +} + +func (d *DFA) getPrecedenceDfa() bool { + return d.precedenceDfa +} + +// setPrecedenceDfa sets whether d is a precedence DFA. If precedenceDfa differs +// from the current DFA configuration, then d.states is cleared, the initial +// state s0 is set to a new DFAState with an empty outgoing DFAState.edges to +// store the start states for individual precedence values if precedenceDfa is +// true or nil otherwise, and d.precedenceDfa is updated. +func (d *DFA) setPrecedenceDfa(precedenceDfa bool) { + if d.getPrecedenceDfa() != precedenceDfa { + d.states = NewJStore[*DFAState, *ObjEqComparator[*DFAState]](&ObjEqComparator[*DFAState]{}) + d.numstates = 0 + + if precedenceDfa { + precedenceState := NewDFAState(-1, NewBaseATNConfigSet(false)) + + precedenceState.setEdges(make([]*DFAState, 0)) + precedenceState.isAcceptState = false + precedenceState.requiresFullContext = false + d.setS0(precedenceState) + } else { + d.setS0(nil) + } + + d.precedenceDfa = precedenceDfa + } +} + +func (d *DFA) getS0() *DFAState { + return d.s0 +} + +func (d *DFA) setS0(s *DFAState) { + d.s0 = s +} + +// sortedStates returns the states in d sorted by their state number. +func (d *DFA) sortedStates() []*DFAState { + + vs := d.states.SortedSlice(func(i, j *DFAState) bool { + return i.stateNumber < j.stateNumber + }) + + return vs +} + +func (d *DFA) String(literalNames []string, symbolicNames []string) string { + if d.getS0() == nil { + return "" + } + + return NewDFASerializer(d, literalNames, symbolicNames).String() +} + +func (d *DFA) ToLexerString() string { + if d.getS0() == nil { + return "" + } + + return NewLexerDFASerializer(d).String() +} diff --git a/runtime/Go/antlr/v4/dfa_serializer.go b/runtime/Go/antlr/v4/dfa_serializer.go new file mode 100644 index 0000000000..84d0a31e53 --- /dev/null +++ b/runtime/Go/antlr/v4/dfa_serializer.go @@ -0,0 +1,158 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" + "strings" +) + +// DFASerializer is a DFA walker that knows how to dump them to serialized +// strings. +type DFASerializer struct { + dfa *DFA + literalNames []string + symbolicNames []string +} + +func NewDFASerializer(dfa *DFA, literalNames, symbolicNames []string) *DFASerializer { + if literalNames == nil { + literalNames = make([]string, 0) + } + + if symbolicNames == nil { + symbolicNames = make([]string, 0) + } + + return &DFASerializer{ + dfa: dfa, + literalNames: literalNames, + symbolicNames: symbolicNames, + } +} + +func (d *DFASerializer) String() string { + if d.dfa.getS0() == nil { + return "" + } + + buf := "" + states := d.dfa.sortedStates() + + for _, s := range states { + if s.edges != nil { + n := len(s.edges) + + for j := 0; j < n; j++ { + t := s.edges[j] + + if t != nil && t.stateNumber != 0x7FFFFFFF { + buf += d.GetStateString(s) + buf += "-" + buf += d.getEdgeLabel(j) + buf += "->" + buf += d.GetStateString(t) + buf += "\n" + } + } + } + } + + if len(buf) == 0 { + return "" + } + + return buf +} + +func (d *DFASerializer) getEdgeLabel(i int) string { + if i == 0 { + return "EOF" + } else if d.literalNames != nil && i-1 < len(d.literalNames) { + return d.literalNames[i-1] + } else if d.symbolicNames != nil && i-1 < len(d.symbolicNames) { + return d.symbolicNames[i-1] + } + + return strconv.Itoa(i - 1) +} + +func (d *DFASerializer) GetStateString(s *DFAState) string { + var a, b string + + if s.isAcceptState { + a = ":" + } + + if s.requiresFullContext { + b = "^" + } + + baseStateStr := a + "s" + strconv.Itoa(s.stateNumber) + b + + if s.isAcceptState { + if s.predicates != nil { + return baseStateStr + "=>" + fmt.Sprint(s.predicates) + } + + return baseStateStr + "=>" + fmt.Sprint(s.prediction) + } + + return baseStateStr +} + +type LexerDFASerializer struct { + *DFASerializer +} + +func NewLexerDFASerializer(dfa *DFA) *LexerDFASerializer { + return &LexerDFASerializer{DFASerializer: NewDFASerializer(dfa, nil, nil)} +} + +func (l *LexerDFASerializer) getEdgeLabel(i int) string { + var sb strings.Builder + sb.Grow(6) + sb.WriteByte('\'') + sb.WriteRune(rune(i)) + sb.WriteByte('\'') + return sb.String() +} + +func (l *LexerDFASerializer) String() string { + if l.dfa.getS0() == nil { + return "" + } + + buf := "" + states := l.dfa.sortedStates() + + for i := 0; i < len(states); i++ { + s := states[i] + + if s.edges != nil { + n := len(s.edges) + + for j := 0; j < n; j++ { + t := s.edges[j] + + if t != nil && t.stateNumber != 0x7FFFFFFF { + buf += l.GetStateString(s) + buf += "-" + buf += l.getEdgeLabel(j) + buf += "->" + buf += l.GetStateString(t) + buf += "\n" + } + } + } + } + + if len(buf) == 0 { + return "" + } + + return buf +} diff --git a/runtime/Go/antlr/v4/dfa_state.go b/runtime/Go/antlr/v4/dfa_state.go new file mode 100644 index 0000000000..c90dec55c8 --- /dev/null +++ b/runtime/Go/antlr/v4/dfa_state.go @@ -0,0 +1,169 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" +) + +// PredPrediction maps a predicate to a predicted alternative. +type PredPrediction struct { + alt int + pred SemanticContext +} + +func NewPredPrediction(pred SemanticContext, alt int) *PredPrediction { + return &PredPrediction{alt: alt, pred: pred} +} + +func (p *PredPrediction) String() string { + return "(" + fmt.Sprint(p.pred) + ", " + fmt.Sprint(p.alt) + ")" +} + +// DFAState represents a set of possible ATN configurations. As Aho, Sethi, +// Ullman p. 117 says: "The DFA uses its state to keep track of all possible +// states the ATN can be in after reading each input symbol. That is to say, +// after reading input a1a2..an, the DFA is in a state that represents the +// subset T of the states of the ATN that are reachable from the ATN's start +// state along some path labeled a1a2..an." In conventional NFA-to-DFA +// conversion, therefore, the subset T would be a bitset representing the set of +// states the ATN could be in. We need to track the alt predicted by each state +// as well, however. More importantly, we need to maintain a stack of states, +// tracking the closure operations as they jump from rule to rule, emulating +// rule invocations (method calls). I have to add a stack to simulate the proper +// lookahead sequences for the underlying LL grammar from which the ATN was +// derived. +// +// I use a set of ATNConfig objects, not simple states. An ATNConfig is both a +// state (ala normal conversion) and a RuleContext describing the chain of rules +// (if any) followed to arrive at that state. +// +// A DFAState may have multiple references to a particular state, but with +// different ATN contexts (with same or different alts) meaning that state was +// reached via a different set of rule invocations. +type DFAState struct { + stateNumber int + configs ATNConfigSet + + // edges elements point to the target of the symbol. Shift up by 1 so (-1) + // Token.EOF maps to the first element. + edges []*DFAState + + isAcceptState bool + + // prediction is the ttype we match or alt we predict if the state is accept. + // Set to ATN.INVALID_ALT_NUMBER when predicates != nil or + // requiresFullContext. + prediction int + + lexerActionExecutor *LexerActionExecutor + + // requiresFullContext indicates it was created during an SLL prediction that + // discovered a conflict between the configurations in the state. Future + // ParserATNSimulator.execATN invocations immediately jump doing + // full context prediction if true. + requiresFullContext bool + + // predicates is the predicates associated with the ATN configurations of the + // DFA state during SLL parsing. When we have predicates, requiresFullContext + // is false, since full context prediction evaluates predicates on-the-fly. If + // d is + // not nil, then prediction is ATN.INVALID_ALT_NUMBER. + // + // We only use these for non-requiresFullContext but conflicting states. That + // means we know from the context (it's $ or we don't dip into outer context) + // that it's an ambiguity not a conflict. + // + // This list is computed by + // ParserATNSimulator.predicateDFAState. + predicates []*PredPrediction +} + +func NewDFAState(stateNumber int, configs ATNConfigSet) *DFAState { + if configs == nil { + configs = NewBaseATNConfigSet(false) + } + + return &DFAState{configs: configs, stateNumber: stateNumber} +} + +// GetAltSet gets the set of all alts mentioned by all ATN configurations in d. +func (d *DFAState) GetAltSet() []int { + var alts []int + + if d.configs != nil { + for _, c := range d.configs.GetItems() { + alts = append(alts, c.GetAlt()) + } + } + + if len(alts) == 0 { + return nil + } + + return alts +} + +func (d *DFAState) getEdges() []*DFAState { + return d.edges +} + +func (d *DFAState) numEdges() int { + return len(d.edges) +} + +func (d *DFAState) getIthEdge(i int) *DFAState { + return d.edges[i] +} + +func (d *DFAState) setEdges(newEdges []*DFAState) { + d.edges = newEdges +} + +func (d *DFAState) setIthEdge(i int, edge *DFAState) { + d.edges[i] = edge +} + +func (d *DFAState) setPrediction(v int) { + d.prediction = v +} + +func (d *DFAState) String() string { + var s string + if d.isAcceptState { + if d.predicates != nil { + s = "=>" + fmt.Sprint(d.predicates) + } else { + s = "=>" + fmt.Sprint(d.prediction) + } + } + + return fmt.Sprintf("%d:%s%s", d.stateNumber, fmt.Sprint(d.configs), s) +} + +func (d *DFAState) Hash() int { + h := murmurInit(7) + h = murmurUpdate(h, d.configs.Hash()) + return murmurFinish(h, 1) +} + +// Equals returns whether d equals other. Two DFAStates are equal if their ATN +// configuration sets are the same. This method is used to see if a state +// already exists. +// +// Because the number of alternatives and number of ATN configurations are +// finite, there is a finite number of DFA states that can be processed. This is +// necessary to show that the algorithm terminates. +// +// Cannot test the DFA state numbers here because in +// ParserATNSimulator.addDFAState we need to know if any other state exists that +// has d exact set of ATN configurations. The stateNumber is irrelevant. +func (d *DFAState) Equals(o Collectable[*DFAState]) bool { + if d == o { + return true + } + + return d.configs.Equals(o.(*DFAState).configs) +} diff --git a/runtime/Go/antlr/v4/diagnostic_error_listener.go b/runtime/Go/antlr/v4/diagnostic_error_listener.go new file mode 100644 index 0000000000..c55bcc19b2 --- /dev/null +++ b/runtime/Go/antlr/v4/diagnostic_error_listener.go @@ -0,0 +1,109 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "strconv" +) + +// +// This implementation of {@link ANTLRErrorListener} can be used to identify +// certain potential correctness and performance problems in grammars. "reports" +// are made by calling {@link Parser//NotifyErrorListeners} with the appropriate +// message. +// +//
    +//
  • Ambiguities: These are cases where more than one path through the +// grammar can Match the input.
  • +//
  • Weak context sensitivity: These are cases where full-context +// prediction resolved an SLL conflict to a unique alternative which equaled the +// minimum alternative of the SLL conflict.
  • +//
  • Strong (forced) context sensitivity: These are cases where the +// full-context prediction resolved an SLL conflict to a unique alternative, +// and the minimum alternative of the SLL conflict was found to not be +// a truly viable alternative. Two-stage parsing cannot be used for inputs where +// d situation occurs.
  • +//
+ +type DiagnosticErrorListener struct { + *DefaultErrorListener + + exactOnly bool +} + +func NewDiagnosticErrorListener(exactOnly bool) *DiagnosticErrorListener { + + n := new(DiagnosticErrorListener) + + // whether all ambiguities or only exact ambiguities are Reported. + n.exactOnly = exactOnly + return n +} + +func (d *DiagnosticErrorListener) ReportAmbiguity(recognizer Parser, dfa *DFA, startIndex, stopIndex int, exact bool, ambigAlts *BitSet, configs ATNConfigSet) { + if d.exactOnly && !exact { + return + } + msg := "reportAmbiguity d=" + + d.getDecisionDescription(recognizer, dfa) + + ": ambigAlts=" + + d.getConflictingAlts(ambigAlts, configs).String() + + ", input='" + + recognizer.GetTokenStream().GetTextFromInterval(NewInterval(startIndex, stopIndex)) + "'" + recognizer.NotifyErrorListeners(msg, nil, nil) +} + +func (d *DiagnosticErrorListener) ReportAttemptingFullContext(recognizer Parser, dfa *DFA, startIndex, stopIndex int, conflictingAlts *BitSet, configs ATNConfigSet) { + + msg := "reportAttemptingFullContext d=" + + d.getDecisionDescription(recognizer, dfa) + + ", input='" + + recognizer.GetTokenStream().GetTextFromInterval(NewInterval(startIndex, stopIndex)) + "'" + recognizer.NotifyErrorListeners(msg, nil, nil) +} + +func (d *DiagnosticErrorListener) ReportContextSensitivity(recognizer Parser, dfa *DFA, startIndex, stopIndex, prediction int, configs ATNConfigSet) { + msg := "reportContextSensitivity d=" + + d.getDecisionDescription(recognizer, dfa) + + ", input='" + + recognizer.GetTokenStream().GetTextFromInterval(NewInterval(startIndex, stopIndex)) + "'" + recognizer.NotifyErrorListeners(msg, nil, nil) +} + +func (d *DiagnosticErrorListener) getDecisionDescription(recognizer Parser, dfa *DFA) string { + decision := dfa.decision + ruleIndex := dfa.atnStartState.GetRuleIndex() + + ruleNames := recognizer.GetRuleNames() + if ruleIndex < 0 || ruleIndex >= len(ruleNames) { + return strconv.Itoa(decision) + } + ruleName := ruleNames[ruleIndex] + if ruleName == "" { + return strconv.Itoa(decision) + } + return strconv.Itoa(decision) + " (" + ruleName + ")" +} + +// Computes the set of conflicting or ambiguous alternatives from a +// configuration set, if that information was not already provided by the +// parser. +// +// @param ReportedAlts The set of conflicting or ambiguous alternatives, as +// Reported by the parser. +// @param configs The conflicting or ambiguous configuration set. +// @return Returns {@code ReportedAlts} if it is not {@code nil}, otherwise +// returns the set of alternatives represented in {@code configs}. +func (d *DiagnosticErrorListener) getConflictingAlts(ReportedAlts *BitSet, set ATNConfigSet) *BitSet { + if ReportedAlts != nil { + return ReportedAlts + } + result := NewBitSet() + for _, c := range set.GetItems() { + result.add(c.GetAlt()) + } + + return result +} diff --git a/runtime/Go/antlr/v4/error_listener.go b/runtime/Go/antlr/v4/error_listener.go new file mode 100644 index 0000000000..f679f0dcd5 --- /dev/null +++ b/runtime/Go/antlr/v4/error_listener.go @@ -0,0 +1,104 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "os" + "strconv" +) + +// Provides an empty default implementation of {@link ANTLRErrorListener}. The +// default implementation of each method does nothing, but can be overridden as +// necessary. + +type ErrorListener interface { + SyntaxError(recognizer Recognizer, offendingSymbol interface{}, line, column int, msg string, e RecognitionException) + ReportAmbiguity(recognizer Parser, dfa *DFA, startIndex, stopIndex int, exact bool, ambigAlts *BitSet, configs ATNConfigSet) + ReportAttemptingFullContext(recognizer Parser, dfa *DFA, startIndex, stopIndex int, conflictingAlts *BitSet, configs ATNConfigSet) + ReportContextSensitivity(recognizer Parser, dfa *DFA, startIndex, stopIndex, prediction int, configs ATNConfigSet) +} + +type DefaultErrorListener struct { +} + +func NewDefaultErrorListener() *DefaultErrorListener { + return new(DefaultErrorListener) +} + +func (d *DefaultErrorListener) SyntaxError(recognizer Recognizer, offendingSymbol interface{}, line, column int, msg string, e RecognitionException) { +} + +func (d *DefaultErrorListener) ReportAmbiguity(recognizer Parser, dfa *DFA, startIndex, stopIndex int, exact bool, ambigAlts *BitSet, configs ATNConfigSet) { +} + +func (d *DefaultErrorListener) ReportAttemptingFullContext(recognizer Parser, dfa *DFA, startIndex, stopIndex int, conflictingAlts *BitSet, configs ATNConfigSet) { +} + +func (d *DefaultErrorListener) ReportContextSensitivity(recognizer Parser, dfa *DFA, startIndex, stopIndex, prediction int, configs ATNConfigSet) { +} + +type ConsoleErrorListener struct { + *DefaultErrorListener +} + +func NewConsoleErrorListener() *ConsoleErrorListener { + return new(ConsoleErrorListener) +} + +// Provides a default instance of {@link ConsoleErrorListener}. +var ConsoleErrorListenerINSTANCE = NewConsoleErrorListener() + +// {@inheritDoc} +// +//

+// This implementation prints messages to {@link System//err} containing the +// values of {@code line}, {@code charPositionInLine}, and {@code msg} using +// the following format.

+// +//
+// line line:charPositionInLine msg
+// 
+func (c *ConsoleErrorListener) SyntaxError(recognizer Recognizer, offendingSymbol interface{}, line, column int, msg string, e RecognitionException) { + fmt.Fprintln(os.Stderr, "line "+strconv.Itoa(line)+":"+strconv.Itoa(column)+" "+msg) +} + +type ProxyErrorListener struct { + *DefaultErrorListener + delegates []ErrorListener +} + +func NewProxyErrorListener(delegates []ErrorListener) *ProxyErrorListener { + if delegates == nil { + panic("delegates is not provided") + } + l := new(ProxyErrorListener) + l.delegates = delegates + return l +} + +func (p *ProxyErrorListener) SyntaxError(recognizer Recognizer, offendingSymbol interface{}, line, column int, msg string, e RecognitionException) { + for _, d := range p.delegates { + d.SyntaxError(recognizer, offendingSymbol, line, column, msg, e) + } +} + +func (p *ProxyErrorListener) ReportAmbiguity(recognizer Parser, dfa *DFA, startIndex, stopIndex int, exact bool, ambigAlts *BitSet, configs ATNConfigSet) { + for _, d := range p.delegates { + d.ReportAmbiguity(recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs) + } +} + +func (p *ProxyErrorListener) ReportAttemptingFullContext(recognizer Parser, dfa *DFA, startIndex, stopIndex int, conflictingAlts *BitSet, configs ATNConfigSet) { + for _, d := range p.delegates { + d.ReportAttemptingFullContext(recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs) + } +} + +func (p *ProxyErrorListener) ReportContextSensitivity(recognizer Parser, dfa *DFA, startIndex, stopIndex, prediction int, configs ATNConfigSet) { + for _, d := range p.delegates { + d.ReportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs) + } +} diff --git a/runtime/Go/antlr/v4/error_strategy.go b/runtime/Go/antlr/v4/error_strategy.go new file mode 100644 index 0000000000..5c0a637ba4 --- /dev/null +++ b/runtime/Go/antlr/v4/error_strategy.go @@ -0,0 +1,734 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "reflect" + "strconv" + "strings" +) + +type ErrorStrategy interface { + reset(Parser) + RecoverInline(Parser) Token + Recover(Parser, RecognitionException) + Sync(Parser) + InErrorRecoveryMode(Parser) bool + ReportError(Parser, RecognitionException) + ReportMatch(Parser) +} + +// This is the default implementation of {@link ANTLRErrorStrategy} used for +// error Reporting and recovery in ANTLR parsers. +type DefaultErrorStrategy struct { + errorRecoveryMode bool + lastErrorIndex int + lastErrorStates *IntervalSet +} + +var _ ErrorStrategy = &DefaultErrorStrategy{} + +func NewDefaultErrorStrategy() *DefaultErrorStrategy { + + d := new(DefaultErrorStrategy) + + // Indicates whether the error strategy is currently "recovering from an + // error". This is used to suppress Reporting multiple error messages while + // attempting to recover from a detected syntax error. + // + // @see //InErrorRecoveryMode + // + d.errorRecoveryMode = false + + // The index into the input stream where the last error occurred. + // This is used to prevent infinite loops where an error is found + // but no token is consumed during recovery...another error is found, + // ad nauseum. This is a failsafe mechanism to guarantee that at least + // one token/tree node is consumed for two errors. + // + d.lastErrorIndex = -1 + d.lastErrorStates = nil + return d +} + +//

The default implementation simply calls {@link //endErrorCondition} to +// ensure that the handler is not in error recovery mode.

+func (d *DefaultErrorStrategy) reset(recognizer Parser) { + d.endErrorCondition(recognizer) +} + +// This method is called to enter error recovery mode when a recognition +// exception is Reported. +// +// @param recognizer the parser instance +func (d *DefaultErrorStrategy) beginErrorCondition(recognizer Parser) { + d.errorRecoveryMode = true +} + +func (d *DefaultErrorStrategy) InErrorRecoveryMode(recognizer Parser) bool { + return d.errorRecoveryMode +} + +// This method is called to leave error recovery mode after recovering from +// a recognition exception. +// +// @param recognizer +func (d *DefaultErrorStrategy) endErrorCondition(recognizer Parser) { + d.errorRecoveryMode = false + d.lastErrorStates = nil + d.lastErrorIndex = -1 +} + +// {@inheritDoc} +// +//

The default implementation simply calls {@link //endErrorCondition}.

+func (d *DefaultErrorStrategy) ReportMatch(recognizer Parser) { + d.endErrorCondition(recognizer) +} + +// {@inheritDoc} +// +//

The default implementation returns immediately if the handler is already +// in error recovery mode. Otherwise, it calls {@link //beginErrorCondition} +// and dispatches the Reporting task based on the runtime type of {@code e} +// according to the following table.

+// +//
    +//
  • {@link NoViableAltException}: Dispatches the call to +// {@link //ReportNoViableAlternative}
  • +//
  • {@link InputMisMatchException}: Dispatches the call to +// {@link //ReportInputMisMatch}
  • +//
  • {@link FailedPredicateException}: Dispatches the call to +// {@link //ReportFailedPredicate}
  • +//
  • All other types: calls {@link Parser//NotifyErrorListeners} to Report +// the exception
  • +//
+func (d *DefaultErrorStrategy) ReportError(recognizer Parser, e RecognitionException) { + // if we've already Reported an error and have not Matched a token + // yet successfully, don't Report any errors. + if d.InErrorRecoveryMode(recognizer) { + return // don't Report spurious errors + } + d.beginErrorCondition(recognizer) + + switch t := e.(type) { + default: + fmt.Println("unknown recognition error type: " + reflect.TypeOf(e).Name()) + // fmt.Println(e.stack) + recognizer.NotifyErrorListeners(e.GetMessage(), e.GetOffendingToken(), e) + case *NoViableAltException: + d.ReportNoViableAlternative(recognizer, t) + case *InputMisMatchException: + d.ReportInputMisMatch(recognizer, t) + case *FailedPredicateException: + d.ReportFailedPredicate(recognizer, t) + } +} + +// {@inheritDoc} +// +//

The default implementation reSynchronizes the parser by consuming tokens +// until we find one in the reSynchronization set--loosely the set of tokens +// that can follow the current rule.

+func (d *DefaultErrorStrategy) Recover(recognizer Parser, e RecognitionException) { + + if d.lastErrorIndex == recognizer.GetInputStream().Index() && + d.lastErrorStates != nil && d.lastErrorStates.contains(recognizer.GetState()) { + // uh oh, another error at same token index and previously-Visited + // state in ATN must be a case where LT(1) is in the recovery + // token set so nothing got consumed. Consume a single token + // at least to prevent an infinite loop d is a failsafe. + recognizer.Consume() + } + d.lastErrorIndex = recognizer.GetInputStream().Index() + if d.lastErrorStates == nil { + d.lastErrorStates = NewIntervalSet() + } + d.lastErrorStates.addOne(recognizer.GetState()) + followSet := d.getErrorRecoverySet(recognizer) + d.consumeUntil(recognizer, followSet) +} + +// The default implementation of {@link ANTLRErrorStrategy//Sync} makes sure +// that the current lookahead symbol is consistent with what were expecting +// at d point in the ATN. You can call d anytime but ANTLR only +// generates code to check before subrules/loops and each iteration. +// +//

Implements Jim Idle's magic Sync mechanism in closures and optional +// subrules. E.g.,

+// +//
+// a : Sync ( stuff Sync )*
+// Sync : {consume to what can follow Sync}
+// 
+// +// At the start of a sub rule upon error, {@link //Sync} performs single +// token deletion, if possible. If it can't do that, it bails on the current +// rule and uses the default error recovery, which consumes until the +// reSynchronization set of the current rule. +// +//

If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block +// with an empty alternative), then the expected set includes what follows +// the subrule.

+// +//

During loop iteration, it consumes until it sees a token that can start a +// sub rule or what follows loop. Yes, that is pretty aggressive. We opt to +// stay in the loop as long as possible.

+// +//

ORIGINS

+// +//

Previous versions of ANTLR did a poor job of their recovery within loops. +// A single mismatch token or missing token would force the parser to bail +// out of the entire rules surrounding the loop. So, for rule

+// +//
+// classfunc : 'class' ID '{' member* '}'
+// 
+// +// input with an extra token between members would force the parser to +// consume until it found the next class definition rather than the next +// member definition of the current class. +// +//

This functionality cost a little bit of effort because the parser has to +// compare token set at the start of the loop and at each iteration. If for +// some reason speed is suffering for you, you can turn off d +// functionality by simply overriding d method as a blank { }.

+func (d *DefaultErrorStrategy) Sync(recognizer Parser) { + // If already recovering, don't try to Sync + if d.InErrorRecoveryMode(recognizer) { + return + } + + s := recognizer.GetInterpreter().atn.states[recognizer.GetState()] + la := recognizer.GetTokenStream().LA(1) + + // try cheaper subset first might get lucky. seems to shave a wee bit off + nextTokens := recognizer.GetATN().NextTokens(s, nil) + if nextTokens.contains(TokenEpsilon) || nextTokens.contains(la) { + return + } + + switch s.GetStateType() { + case ATNStateBlockStart, ATNStateStarBlockStart, ATNStatePlusBlockStart, ATNStateStarLoopEntry: + // Report error and recover if possible + if d.SingleTokenDeletion(recognizer) != nil { + return + } + panic(NewInputMisMatchException(recognizer)) + case ATNStatePlusLoopBack, ATNStateStarLoopBack: + d.ReportUnwantedToken(recognizer) + expecting := NewIntervalSet() + expecting.addSet(recognizer.GetExpectedTokens()) + whatFollowsLoopIterationOrRule := expecting.addSet(d.getErrorRecoverySet(recognizer)) + d.consumeUntil(recognizer, whatFollowsLoopIterationOrRule) + default: + // do nothing if we can't identify the exact kind of ATN state + } +} + +// This is called by {@link //ReportError} when the exception is a +// {@link NoViableAltException}. +// +// @see //ReportError +// +// @param recognizer the parser instance +// @param e the recognition exception +func (d *DefaultErrorStrategy) ReportNoViableAlternative(recognizer Parser, e *NoViableAltException) { + tokens := recognizer.GetTokenStream() + var input string + if tokens != nil { + if e.startToken.GetTokenType() == TokenEOF { + input = "" + } else { + input = tokens.GetTextFromTokens(e.startToken, e.offendingToken) + } + } else { + input = "" + } + msg := "no viable alternative at input " + d.escapeWSAndQuote(input) + recognizer.NotifyErrorListeners(msg, e.offendingToken, e) +} + +// This is called by {@link //ReportError} when the exception is an +// {@link InputMisMatchException}. +// +// @see //ReportError +// +// @param recognizer the parser instance +// @param e the recognition exception +func (this *DefaultErrorStrategy) ReportInputMisMatch(recognizer Parser, e *InputMisMatchException) { + msg := "mismatched input " + this.GetTokenErrorDisplay(e.offendingToken) + + " expecting " + e.getExpectedTokens().StringVerbose(recognizer.GetLiteralNames(), recognizer.GetSymbolicNames(), false) + recognizer.NotifyErrorListeners(msg, e.offendingToken, e) +} + +// This is called by {@link //ReportError} when the exception is a +// {@link FailedPredicateException}. +// +// @see //ReportError +// +// @param recognizer the parser instance +// @param e the recognition exception +func (d *DefaultErrorStrategy) ReportFailedPredicate(recognizer Parser, e *FailedPredicateException) { + ruleName := recognizer.GetRuleNames()[recognizer.GetParserRuleContext().GetRuleIndex()] + msg := "rule " + ruleName + " " + e.message + recognizer.NotifyErrorListeners(msg, e.offendingToken, e) +} + +// This method is called to Report a syntax error which requires the removal +// of a token from the input stream. At the time d method is called, the +// erroneous symbol is current {@code LT(1)} symbol and has not yet been +// removed from the input stream. When d method returns, +// {@code recognizer} is in error recovery mode. +// +//

This method is called when {@link //singleTokenDeletion} identifies +// single-token deletion as a viable recovery strategy for a mismatched +// input error.

+// +//

The default implementation simply returns if the handler is already in +// error recovery mode. Otherwise, it calls {@link //beginErrorCondition} to +// enter error recovery mode, followed by calling +// {@link Parser//NotifyErrorListeners}.

+// +// @param recognizer the parser instance +func (d *DefaultErrorStrategy) ReportUnwantedToken(recognizer Parser) { + if d.InErrorRecoveryMode(recognizer) { + return + } + d.beginErrorCondition(recognizer) + t := recognizer.GetCurrentToken() + tokenName := d.GetTokenErrorDisplay(t) + expecting := d.GetExpectedTokens(recognizer) + msg := "extraneous input " + tokenName + " expecting " + + expecting.StringVerbose(recognizer.GetLiteralNames(), recognizer.GetSymbolicNames(), false) + recognizer.NotifyErrorListeners(msg, t, nil) +} + +// This method is called to Report a syntax error which requires the +// insertion of a missing token into the input stream. At the time d +// method is called, the missing token has not yet been inserted. When d +// method returns, {@code recognizer} is in error recovery mode. +// +//

This method is called when {@link //singleTokenInsertion} identifies +// single-token insertion as a viable recovery strategy for a mismatched +// input error.

+// +//

The default implementation simply returns if the handler is already in +// error recovery mode. Otherwise, it calls {@link //beginErrorCondition} to +// enter error recovery mode, followed by calling +// {@link Parser//NotifyErrorListeners}.

+// +// @param recognizer the parser instance +func (d *DefaultErrorStrategy) ReportMissingToken(recognizer Parser) { + if d.InErrorRecoveryMode(recognizer) { + return + } + d.beginErrorCondition(recognizer) + t := recognizer.GetCurrentToken() + expecting := d.GetExpectedTokens(recognizer) + msg := "missing " + expecting.StringVerbose(recognizer.GetLiteralNames(), recognizer.GetSymbolicNames(), false) + + " at " + d.GetTokenErrorDisplay(t) + recognizer.NotifyErrorListeners(msg, t, nil) +} + +//

The default implementation attempts to recover from the mismatched input +// by using single token insertion and deletion as described below. If the +// recovery attempt fails, d method panics an +// {@link InputMisMatchException}.

+// +//

EXTRA TOKEN (single token deletion)

+// +//

{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the +// right token, however, then assume {@code LA(1)} is some extra spurious +// token and delete it. Then consume and return the next token (which was +// the {@code LA(2)} token) as the successful result of the Match operation.

+// +//

This recovery strategy is implemented by {@link +// //singleTokenDeletion}.

+// +//

MISSING TOKEN (single token insertion)

+// +//

If current token (at {@code LA(1)}) is consistent with what could come +// after the expected {@code LA(1)} token, then assume the token is missing +// and use the parser's {@link TokenFactory} to create it on the fly. The +// "insertion" is performed by returning the created token as the successful +// result of the Match operation.

+// +//

This recovery strategy is implemented by {@link +// //singleTokenInsertion}.

+// +//

EXAMPLE

+// +//

For example, Input {@code i=(3} is clearly missing the {@code ')'}. When +// the parser returns from the nested call to {@code expr}, it will have +// call chain:

+// +//
+// stat &rarr expr &rarr atom
+// 
+// +// and it will be trying to Match the {@code ')'} at d point in the +// derivation: +// +//
+// => ID '=' '(' INT ')' ('+' atom)* ”
+// ^
+// 
+// +// The attempt to Match {@code ')'} will fail when it sees {@code ”} and +// call {@link //recoverInline}. To recover, it sees that {@code LA(1)==”} +// is in the set of tokens that can follow the {@code ')'} token reference +// in rule {@code atom}. It can assume that you forgot the {@code ')'}. +func (d *DefaultErrorStrategy) RecoverInline(recognizer Parser) Token { + // SINGLE TOKEN DELETION + MatchedSymbol := d.SingleTokenDeletion(recognizer) + if MatchedSymbol != nil { + // we have deleted the extra token. + // now, move past ttype token as if all were ok + recognizer.Consume() + return MatchedSymbol + } + // SINGLE TOKEN INSERTION + if d.SingleTokenInsertion(recognizer) { + return d.GetMissingSymbol(recognizer) + } + // even that didn't work must panic the exception + panic(NewInputMisMatchException(recognizer)) +} + +// This method implements the single-token insertion inline error recovery +// strategy. It is called by {@link //recoverInline} if the single-token +// deletion strategy fails to recover from the mismatched input. If this +// method returns {@code true}, {@code recognizer} will be in error recovery +// mode. +// +//

This method determines whether or not single-token insertion is viable by +// checking if the {@code LA(1)} input symbol could be successfully Matched +// if it were instead the {@code LA(2)} symbol. If d method returns +// {@code true}, the caller is responsible for creating and inserting a +// token with the correct type to produce d behavior.

+// +// @param recognizer the parser instance +// @return {@code true} if single-token insertion is a viable recovery +// strategy for the current mismatched input, otherwise {@code false} +func (d *DefaultErrorStrategy) SingleTokenInsertion(recognizer Parser) bool { + currentSymbolType := recognizer.GetTokenStream().LA(1) + // if current token is consistent with what could come after current + // ATN state, then we know we're missing a token error recovery + // is free to conjure up and insert the missing token + atn := recognizer.GetInterpreter().atn + currentState := atn.states[recognizer.GetState()] + next := currentState.GetTransitions()[0].getTarget() + expectingAtLL2 := atn.NextTokens(next, recognizer.GetParserRuleContext()) + if expectingAtLL2.contains(currentSymbolType) { + d.ReportMissingToken(recognizer) + return true + } + + return false +} + +// This method implements the single-token deletion inline error recovery +// strategy. It is called by {@link //recoverInline} to attempt to recover +// from mismatched input. If this method returns nil, the parser and error +// handler state will not have changed. If this method returns non-nil, +// {@code recognizer} will not be in error recovery mode since the +// returned token was a successful Match. +// +//

If the single-token deletion is successful, d method calls +// {@link //ReportUnwantedToken} to Report the error, followed by +// {@link Parser//consume} to actually "delete" the extraneous token. Then, +// before returning {@link //ReportMatch} is called to signal a successful +// Match.

+// +// @param recognizer the parser instance +// @return the successfully Matched {@link Token} instance if single-token +// deletion successfully recovers from the mismatched input, otherwise +// {@code nil} +func (d *DefaultErrorStrategy) SingleTokenDeletion(recognizer Parser) Token { + NextTokenType := recognizer.GetTokenStream().LA(2) + expecting := d.GetExpectedTokens(recognizer) + if expecting.contains(NextTokenType) { + d.ReportUnwantedToken(recognizer) + // print("recoverFromMisMatchedToken deleting " \ + // + str(recognizer.GetTokenStream().LT(1)) \ + // + " since " + str(recognizer.GetTokenStream().LT(2)) \ + // + " is what we want", file=sys.stderr) + recognizer.Consume() // simply delete extra token + // we want to return the token we're actually Matching + MatchedSymbol := recognizer.GetCurrentToken() + d.ReportMatch(recognizer) // we know current token is correct + return MatchedSymbol + } + + return nil +} + +// Conjure up a missing token during error recovery. +// +// The recognizer attempts to recover from single missing +// symbols. But, actions might refer to that missing symbol. +// For example, x=ID {f($x)}. The action clearly assumes +// that there has been an identifier Matched previously and that +// $x points at that token. If that token is missing, but +// the next token in the stream is what we want we assume that +// d token is missing and we keep going. Because we +// have to return some token to replace the missing token, +// we have to conjure one up. This method gives the user control +// over the tokens returned for missing tokens. Mostly, +// you will want to create something special for identifier +// tokens. For literals such as '{' and ',', the default +// action in the parser or tree parser works. It simply creates +// a CommonToken of the appropriate type. The text will be the token. +// If you change what tokens must be created by the lexer, +// override d method to create the appropriate tokens. +func (d *DefaultErrorStrategy) GetMissingSymbol(recognizer Parser) Token { + currentSymbol := recognizer.GetCurrentToken() + expecting := d.GetExpectedTokens(recognizer) + expectedTokenType := expecting.first() + var tokenText string + + if expectedTokenType == TokenEOF { + tokenText = "" + } else { + ln := recognizer.GetLiteralNames() + if expectedTokenType > 0 && expectedTokenType < len(ln) { + tokenText = "" + } else { + tokenText = "" // TODO matches the JS impl + } + } + current := currentSymbol + lookback := recognizer.GetTokenStream().LT(-1) + if current.GetTokenType() == TokenEOF && lookback != nil { + current = lookback + } + + tf := recognizer.GetTokenFactory() + + return tf.Create(current.GetSource(), expectedTokenType, tokenText, TokenDefaultChannel, -1, -1, current.GetLine(), current.GetColumn()) +} + +func (d *DefaultErrorStrategy) GetExpectedTokens(recognizer Parser) *IntervalSet { + return recognizer.GetExpectedTokens() +} + +// How should a token be displayed in an error message? The default +// is to display just the text, but during development you might +// want to have a lot of information spit out. Override in that case +// to use t.String() (which, for CommonToken, dumps everything about +// the token). This is better than forcing you to override a method in +// your token objects because you don't have to go modify your lexer +// so that it creates a NewJava type. +func (d *DefaultErrorStrategy) GetTokenErrorDisplay(t Token) string { + if t == nil { + return "" + } + s := t.GetText() + if s == "" { + if t.GetTokenType() == TokenEOF { + s = "" + } else { + s = "<" + strconv.Itoa(t.GetTokenType()) + ">" + } + } + return d.escapeWSAndQuote(s) +} + +func (d *DefaultErrorStrategy) escapeWSAndQuote(s string) string { + s = strings.Replace(s, "\t", "\\t", -1) + s = strings.Replace(s, "\n", "\\n", -1) + s = strings.Replace(s, "\r", "\\r", -1) + return "'" + s + "'" +} + +// Compute the error recovery set for the current rule. During +// rule invocation, the parser pushes the set of tokens that can +// follow that rule reference on the stack d amounts to +// computing FIRST of what follows the rule reference in the +// enclosing rule. See LinearApproximator.FIRST(). +// This local follow set only includes tokens +// from within the rule i.e., the FIRST computation done by +// ANTLR stops at the end of a rule. +// +// # EXAMPLE +// +// When you find a "no viable alt exception", the input is not +// consistent with any of the alternatives for rule r. The best +// thing to do is to consume tokens until you see something that +// can legally follow a call to r//or* any rule that called r. +// You don't want the exact set of viable next tokens because the +// input might just be missing a token--you might consume the +// rest of the input looking for one of the missing tokens. +// +// Consider grammar: +// +// a : '[' b ']' +// | '(' b ')' +// +// b : c '^' INT +// c : ID +// | INT +// +// At each rule invocation, the set of tokens that could follow +// that rule is pushed on a stack. Here are the various +// context-sensitive follow sets: +// +// FOLLOW(b1_in_a) = FIRST(']') = ']' +// FOLLOW(b2_in_a) = FIRST(')') = ')' +// FOLLOW(c_in_b) = FIRST('^') = '^' +// +// Upon erroneous input "[]", the call chain is +// +// a -> b -> c +// +// and, hence, the follow context stack is: +// +// depth follow set start of rule execution +// 0 a (from main()) +// 1 ']' b +// 2 '^' c +// +// Notice that ')' is not included, because b would have to have +// been called from a different context in rule a for ')' to be +// included. +// +// For error recovery, we cannot consider FOLLOW(c) +// (context-sensitive or otherwise). We need the combined set of +// all context-sensitive FOLLOW sets--the set of all tokens that +// could follow any reference in the call chain. We need to +// reSync to one of those tokens. Note that FOLLOW(c)='^' and if +// we reSync'd to that token, we'd consume until EOF. We need to +// Sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. +// In this case, for input "[]", LA(1) is ']' and in the set, so we would +// not consume anything. After printing an error, rule c would +// return normally. Rule b would not find the required '^' though. +// At this point, it gets a mismatched token error and panics an +// exception (since LA(1) is not in the viable following token +// set). The rule exception handler tries to recover, but finds +// the same recovery set and doesn't consume anything. Rule b +// exits normally returning to rule a. Now it finds the ']' (and +// with the successful Match exits errorRecovery mode). +// +// So, you can see that the parser walks up the call chain looking +// for the token that was a member of the recovery set. +// +// Errors are not generated in errorRecovery mode. +// +// ANTLR's error recovery mechanism is based upon original ideas: +// +// "Algorithms + Data Structures = Programs" by Niklaus Wirth +// +// and +// +// "A note on error recovery in recursive descent parsers": +// http://portal.acm.org/citation.cfm?id=947902.947905 +// +// Later, Josef Grosch had some good ideas: +// +// "Efficient and Comfortable Error Recovery in Recursive Descent +// Parsers": +// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip +// +// Like Grosch I implement context-sensitive FOLLOW sets that are combined +// at run-time upon error to avoid overhead during parsing. +func (d *DefaultErrorStrategy) getErrorRecoverySet(recognizer Parser) *IntervalSet { + atn := recognizer.GetInterpreter().atn + ctx := recognizer.GetParserRuleContext() + recoverSet := NewIntervalSet() + for ctx != nil && ctx.GetInvokingState() >= 0 { + // compute what follows who invoked us + invokingState := atn.states[ctx.GetInvokingState()] + rt := invokingState.GetTransitions()[0] + follow := atn.NextTokens(rt.(*RuleTransition).followState, nil) + recoverSet.addSet(follow) + ctx = ctx.GetParent().(ParserRuleContext) + } + recoverSet.removeOne(TokenEpsilon) + return recoverSet +} + +// Consume tokens until one Matches the given token set.// +func (d *DefaultErrorStrategy) consumeUntil(recognizer Parser, set *IntervalSet) { + ttype := recognizer.GetTokenStream().LA(1) + for ttype != TokenEOF && !set.contains(ttype) { + recognizer.Consume() + ttype = recognizer.GetTokenStream().LA(1) + } +} + +// +// This implementation of {@link ANTLRErrorStrategy} responds to syntax errors +// by immediately canceling the parse operation with a +// {@link ParseCancellationException}. The implementation ensures that the +// {@link ParserRuleContext//exception} field is set for all parse tree nodes +// that were not completed prior to encountering the error. +// +//

+// This error strategy is useful in the following scenarios.

+// +//
    +//
  • Two-stage parsing: This error strategy allows the first +// stage of two-stage parsing to immediately terminate if an error is +// encountered, and immediately fall back to the second stage. In addition to +// avoiding wasted work by attempting to recover from errors here, the empty +// implementation of {@link BailErrorStrategy//Sync} improves the performance of +// the first stage.
  • +//
  • Silent validation: When syntax errors are not being +// Reported or logged, and the parse result is simply ignored if errors occur, +// the {@link BailErrorStrategy} avoids wasting work on recovering from errors +// when the result will be ignored either way.
  • +//
+// +//

+// {@code myparser.setErrorHandler(NewBailErrorStrategy())}

+// +// @see Parser//setErrorHandler(ANTLRErrorStrategy) + +type BailErrorStrategy struct { + *DefaultErrorStrategy +} + +var _ ErrorStrategy = &BailErrorStrategy{} + +func NewBailErrorStrategy() *BailErrorStrategy { + + b := new(BailErrorStrategy) + + b.DefaultErrorStrategy = NewDefaultErrorStrategy() + + return b +} + +// Instead of recovering from exception {@code e}, re-panic it wrapped +// in a {@link ParseCancellationException} so it is not caught by the +// rule func catches. Use {@link Exception//getCause()} to get the +// original {@link RecognitionException}. +func (b *BailErrorStrategy) Recover(recognizer Parser, e RecognitionException) { + context := recognizer.GetParserRuleContext() + for context != nil { + context.SetException(e) + if parent, ok := context.GetParent().(ParserRuleContext); ok { + context = parent + } else { + context = nil + } + } + panic(NewParseCancellationException()) // TODO we don't emit e properly +} + +// Make sure we don't attempt to recover inline if the parser +// successfully recovers, it won't panic an exception. +func (b *BailErrorStrategy) RecoverInline(recognizer Parser) Token { + b.Recover(recognizer, NewInputMisMatchException(recognizer)) + + return nil +} + +// Make sure we don't attempt to recover from problems in subrules.// +func (b *BailErrorStrategy) Sync(recognizer Parser) { + // pass +} diff --git a/runtime/Go/antlr/v4/errors.go b/runtime/Go/antlr/v4/errors.go new file mode 100644 index 0000000000..3954c13782 --- /dev/null +++ b/runtime/Go/antlr/v4/errors.go @@ -0,0 +1,238 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +// The root of the ANTLR exception hierarchy. In general, ANTLR tracks just +// 3 kinds of errors: prediction errors, failed predicate errors, and +// mismatched input errors. In each case, the parser knows where it is +// in the input, where it is in the ATN, the rule invocation stack, +// and what kind of problem occurred. + +type RecognitionException interface { + GetOffendingToken() Token + GetMessage() string + GetInputStream() IntStream +} + +type BaseRecognitionException struct { + message string + recognizer Recognizer + offendingToken Token + offendingState int + ctx RuleContext + input IntStream +} + +func NewBaseRecognitionException(message string, recognizer Recognizer, input IntStream, ctx RuleContext) *BaseRecognitionException { + + // todo + // Error.call(this) + // + // if (!!Error.captureStackTrace) { + // Error.captureStackTrace(this, RecognitionException) + // } else { + // stack := NewError().stack + // } + // TODO may be able to use - "runtime" func Stack(buf []byte, all bool) int + + t := new(BaseRecognitionException) + + t.message = message + t.recognizer = recognizer + t.input = input + t.ctx = ctx + // The current {@link Token} when an error occurred. Since not all streams + // support accessing symbols by index, we have to track the {@link Token} + // instance itself. + t.offendingToken = nil + // Get the ATN state number the parser was in at the time the error + // occurred. For {@link NoViableAltException} and + // {@link LexerNoViableAltException} exceptions, this is the + // {@link DecisionState} number. For others, it is the state whose outgoing + // edge we couldn't Match. + t.offendingState = -1 + if t.recognizer != nil { + t.offendingState = t.recognizer.GetState() + } + + return t +} + +func (b *BaseRecognitionException) GetMessage() string { + return b.message +} + +func (b *BaseRecognitionException) GetOffendingToken() Token { + return b.offendingToken +} + +func (b *BaseRecognitionException) GetInputStream() IntStream { + return b.input +} + +//

If the state number is not known, b method returns -1.

+ +// Gets the set of input symbols which could potentially follow the +// previously Matched symbol at the time b exception was panicn. +// +//

If the set of expected tokens is not known and could not be computed, +// b method returns {@code nil}.

+// +// @return The set of token types that could potentially follow the current +// state in the ATN, or {@code nil} if the information is not available. +// / +func (b *BaseRecognitionException) getExpectedTokens() *IntervalSet { + if b.recognizer != nil { + return b.recognizer.GetATN().getExpectedTokens(b.offendingState, b.ctx) + } + + return nil +} + +func (b *BaseRecognitionException) String() string { + return b.message +} + +type LexerNoViableAltException struct { + *BaseRecognitionException + + startIndex int + deadEndConfigs ATNConfigSet +} + +func NewLexerNoViableAltException(lexer Lexer, input CharStream, startIndex int, deadEndConfigs ATNConfigSet) *LexerNoViableAltException { + + l := new(LexerNoViableAltException) + + l.BaseRecognitionException = NewBaseRecognitionException("", lexer, input, nil) + + l.startIndex = startIndex + l.deadEndConfigs = deadEndConfigs + + return l +} + +func (l *LexerNoViableAltException) String() string { + symbol := "" + if l.startIndex >= 0 && l.startIndex < l.input.Size() { + symbol = l.input.(CharStream).GetTextFromInterval(NewInterval(l.startIndex, l.startIndex)) + } + return "LexerNoViableAltException" + symbol +} + +type NoViableAltException struct { + *BaseRecognitionException + + startToken Token + offendingToken Token + ctx ParserRuleContext + deadEndConfigs ATNConfigSet +} + +// Indicates that the parser could not decide which of two or more paths +// to take based upon the remaining input. It tracks the starting token +// of the offending input and also knows where the parser was +// in the various paths when the error. Reported by ReportNoViableAlternative() +func NewNoViableAltException(recognizer Parser, input TokenStream, startToken Token, offendingToken Token, deadEndConfigs ATNConfigSet, ctx ParserRuleContext) *NoViableAltException { + + if ctx == nil { + ctx = recognizer.GetParserRuleContext() + } + + if offendingToken == nil { + offendingToken = recognizer.GetCurrentToken() + } + + if startToken == nil { + startToken = recognizer.GetCurrentToken() + } + + if input == nil { + input = recognizer.GetInputStream().(TokenStream) + } + + n := new(NoViableAltException) + n.BaseRecognitionException = NewBaseRecognitionException("", recognizer, input, ctx) + + // Which configurations did we try at input.Index() that couldn't Match + // input.LT(1)?// + n.deadEndConfigs = deadEndConfigs + // The token object at the start index the input stream might + // not be buffering tokens so get a reference to it. (At the + // time the error occurred, of course the stream needs to keep a + // buffer all of the tokens but later we might not have access to those.) + n.startToken = startToken + n.offendingToken = offendingToken + + return n +} + +type InputMisMatchException struct { + *BaseRecognitionException +} + +// This signifies any kind of mismatched input exceptions such as +// when the current input does not Match the expected token. +func NewInputMisMatchException(recognizer Parser) *InputMisMatchException { + + i := new(InputMisMatchException) + i.BaseRecognitionException = NewBaseRecognitionException("", recognizer, recognizer.GetInputStream(), recognizer.GetParserRuleContext()) + + i.offendingToken = recognizer.GetCurrentToken() + + return i + +} + +// A semantic predicate failed during validation. Validation of predicates +// occurs when normally parsing the alternative just like Matching a token. +// Disambiguating predicate evaluation occurs when we test a predicate during +// prediction. + +type FailedPredicateException struct { + *BaseRecognitionException + + ruleIndex int + predicateIndex int + predicate string +} + +func NewFailedPredicateException(recognizer Parser, predicate string, message string) *FailedPredicateException { + + f := new(FailedPredicateException) + + f.BaseRecognitionException = NewBaseRecognitionException(f.formatMessage(predicate, message), recognizer, recognizer.GetInputStream(), recognizer.GetParserRuleContext()) + + s := recognizer.GetInterpreter().atn.states[recognizer.GetState()] + trans := s.GetTransitions()[0] + if trans2, ok := trans.(*PredicateTransition); ok { + f.ruleIndex = trans2.ruleIndex + f.predicateIndex = trans2.predIndex + } else { + f.ruleIndex = 0 + f.predicateIndex = 0 + } + f.predicate = predicate + f.offendingToken = recognizer.GetCurrentToken() + + return f +} + +func (f *FailedPredicateException) formatMessage(predicate, message string) string { + if message != "" { + return message + } + + return "failed predicate: {" + predicate + "}?" +} + +type ParseCancellationException struct { +} + +func NewParseCancellationException() *ParseCancellationException { + // Error.call(this) + // Error.captureStackTrace(this, ParseCancellationException) + return new(ParseCancellationException) +} diff --git a/runtime/Go/antlr/v4/file_stream.go b/runtime/Go/antlr/v4/file_stream.go new file mode 100644 index 0000000000..bd6ad5efe3 --- /dev/null +++ b/runtime/Go/antlr/v4/file_stream.go @@ -0,0 +1,49 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "bytes" + "io" + "os" +) + +// This is an InputStream that is loaded from a file all at once +// when you construct the object. + +type FileStream struct { + *InputStream + + filename string +} + +func NewFileStream(fileName string) (*FileStream, error) { + + buf := bytes.NewBuffer(nil) + + f, err := os.Open(fileName) + if err != nil { + return nil, err + } + defer f.Close() + _, err = io.Copy(buf, f) + if err != nil { + return nil, err + } + + fs := new(FileStream) + + fs.filename = fileName + s := string(buf.Bytes()) + + fs.InputStream = NewInputStream(s) + + return fs, nil + +} + +func (f *FileStream) GetSourceName() string { + return f.filename +} diff --git a/runtime/Go/antlr/v4/go.mod b/runtime/Go/antlr/v4/go.mod new file mode 100644 index 0000000000..0d8feb6214 --- /dev/null +++ b/runtime/Go/antlr/v4/go.mod @@ -0,0 +1,5 @@ +module github.com/antlr/antlr4/runtime/Go/antlr/v4 + +go 1.18 + +require golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e diff --git a/runtime/Go/antlr/v4/input_stream.go b/runtime/Go/antlr/v4/input_stream.go new file mode 100644 index 0000000000..a8b889cedb --- /dev/null +++ b/runtime/Go/antlr/v4/input_stream.go @@ -0,0 +1,113 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +type InputStream struct { + name string + index int + data []rune + size int +} + +func NewInputStream(data string) *InputStream { + + is := new(InputStream) + + is.name = "" + is.index = 0 + is.data = []rune(data) + is.size = len(is.data) // number of runes + + return is +} + +func (is *InputStream) reset() { + is.index = 0 +} + +func (is *InputStream) Consume() { + if is.index >= is.size { + // assert is.LA(1) == TokenEOF + panic("cannot consume EOF") + } + is.index++ +} + +func (is *InputStream) LA(offset int) int { + + if offset == 0 { + return 0 // nil + } + if offset < 0 { + offset++ // e.g., translate LA(-1) to use offset=0 + } + pos := is.index + offset - 1 + + if pos < 0 || pos >= is.size { // invalid + return TokenEOF + } + + return int(is.data[pos]) +} + +func (is *InputStream) LT(offset int) int { + return is.LA(offset) +} + +func (is *InputStream) Index() int { + return is.index +} + +func (is *InputStream) Size() int { + return is.size +} + +// mark/release do nothing we have entire buffer +func (is *InputStream) Mark() int { + return -1 +} + +func (is *InputStream) Release(marker int) { +} + +func (is *InputStream) Seek(index int) { + if index <= is.index { + is.index = index // just jump don't update stream state (line,...) + return + } + // seek forward + is.index = intMin(index, is.size) +} + +func (is *InputStream) GetText(start int, stop int) string { + if stop >= is.size { + stop = is.size - 1 + } + if start >= is.size { + return "" + } + + return string(is.data[start : stop+1]) +} + +func (is *InputStream) GetTextFromTokens(start, stop Token) string { + if start != nil && stop != nil { + return is.GetTextFromInterval(NewInterval(start.GetTokenIndex(), stop.GetTokenIndex())) + } + + return "" +} + +func (is *InputStream) GetTextFromInterval(i *Interval) string { + return is.GetText(i.Start, i.Stop) +} + +func (*InputStream) GetSourceName() string { + return "Obtained from string" +} + +func (is *InputStream) String() string { + return string(is.data) +} diff --git a/runtime/Go/antlr/v4/int_stream.go b/runtime/Go/antlr/v4/int_stream.go new file mode 100644 index 0000000000..4778878bd0 --- /dev/null +++ b/runtime/Go/antlr/v4/int_stream.go @@ -0,0 +1,16 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +type IntStream interface { + Consume() + LA(int) int + Mark() int + Release(marker int) + Index() int + Seek(index int) + Size() int + GetSourceName() string +} diff --git a/runtime/Go/antlr/v4/interval_set.go b/runtime/Go/antlr/v4/interval_set.go new file mode 100644 index 0000000000..c1e155e818 --- /dev/null +++ b/runtime/Go/antlr/v4/interval_set.go @@ -0,0 +1,312 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "strconv" + "strings" +) + +type Interval struct { + Start int + Stop int +} + +/* stop is not included! */ +func NewInterval(start, stop int) *Interval { + i := new(Interval) + + i.Start = start + i.Stop = stop + return i +} + +func (i *Interval) Contains(item int) bool { + return item >= i.Start && item < i.Stop +} + +func (i *Interval) String() string { + if i.Start == i.Stop-1 { + return strconv.Itoa(i.Start) + } + + return strconv.Itoa(i.Start) + ".." + strconv.Itoa(i.Stop-1) +} + +func (i *Interval) length() int { + return i.Stop - i.Start +} + +type IntervalSet struct { + intervals []*Interval + readOnly bool +} + +func NewIntervalSet() *IntervalSet { + + i := new(IntervalSet) + + i.intervals = nil + i.readOnly = false + + return i +} + +func (i *IntervalSet) first() int { + if len(i.intervals) == 0 { + return TokenInvalidType + } + + return i.intervals[0].Start +} + +func (i *IntervalSet) addOne(v int) { + i.addInterval(NewInterval(v, v+1)) +} + +func (i *IntervalSet) addRange(l, h int) { + i.addInterval(NewInterval(l, h+1)) +} + +func (i *IntervalSet) addInterval(v *Interval) { + if i.intervals == nil { + i.intervals = make([]*Interval, 0) + i.intervals = append(i.intervals, v) + } else { + // find insert pos + for k, interval := range i.intervals { + // distinct range -> insert + if v.Stop < interval.Start { + i.intervals = append(i.intervals[0:k], append([]*Interval{v}, i.intervals[k:]...)...) + return + } else if v.Stop == interval.Start { + i.intervals[k].Start = v.Start + return + } else if v.Start <= interval.Stop { + i.intervals[k] = NewInterval(intMin(interval.Start, v.Start), intMax(interval.Stop, v.Stop)) + + // if not applying to end, merge potential overlaps + if k < len(i.intervals)-1 { + l := i.intervals[k] + r := i.intervals[k+1] + // if r contained in l + if l.Stop >= r.Stop { + i.intervals = append(i.intervals[0:k+1], i.intervals[k+2:]...) + } else if l.Stop >= r.Start { // partial overlap + i.intervals[k] = NewInterval(l.Start, r.Stop) + i.intervals = append(i.intervals[0:k+1], i.intervals[k+2:]...) + } + } + return + } + } + // greater than any exiting + i.intervals = append(i.intervals, v) + } +} + +func (i *IntervalSet) addSet(other *IntervalSet) *IntervalSet { + if other.intervals != nil { + for k := 0; k < len(other.intervals); k++ { + i2 := other.intervals[k] + i.addInterval(NewInterval(i2.Start, i2.Stop)) + } + } + return i +} + +func (i *IntervalSet) complement(start int, stop int) *IntervalSet { + result := NewIntervalSet() + result.addInterval(NewInterval(start, stop+1)) + for j := 0; j < len(i.intervals); j++ { + result.removeRange(i.intervals[j]) + } + return result +} + +func (i *IntervalSet) contains(item int) bool { + if i.intervals == nil { + return false + } + for k := 0; k < len(i.intervals); k++ { + if i.intervals[k].Contains(item) { + return true + } + } + return false +} + +func (i *IntervalSet) length() int { + len := 0 + + for _, v := range i.intervals { + len += v.length() + } + + return len +} + +func (i *IntervalSet) removeRange(v *Interval) { + if v.Start == v.Stop-1 { + i.removeOne(v.Start) + } else if i.intervals != nil { + k := 0 + for n := 0; n < len(i.intervals); n++ { + ni := i.intervals[k] + // intervals are ordered + if v.Stop <= ni.Start { + return + } else if v.Start > ni.Start && v.Stop < ni.Stop { + i.intervals[k] = NewInterval(ni.Start, v.Start) + x := NewInterval(v.Stop, ni.Stop) + // i.intervals.splice(k, 0, x) + i.intervals = append(i.intervals[0:k], append([]*Interval{x}, i.intervals[k:]...)...) + return + } else if v.Start <= ni.Start && v.Stop >= ni.Stop { + // i.intervals.splice(k, 1) + i.intervals = append(i.intervals[0:k], i.intervals[k+1:]...) + k = k - 1 // need another pass + } else if v.Start < ni.Stop { + i.intervals[k] = NewInterval(ni.Start, v.Start) + } else if v.Stop < ni.Stop { + i.intervals[k] = NewInterval(v.Stop, ni.Stop) + } + k++ + } + } +} + +func (i *IntervalSet) removeOne(v int) { + if i.intervals != nil { + for k := 0; k < len(i.intervals); k++ { + ki := i.intervals[k] + // intervals i ordered + if v < ki.Start { + return + } else if v == ki.Start && v == ki.Stop-1 { + // i.intervals.splice(k, 1) + i.intervals = append(i.intervals[0:k], i.intervals[k+1:]...) + return + } else if v == ki.Start { + i.intervals[k] = NewInterval(ki.Start+1, ki.Stop) + return + } else if v == ki.Stop-1 { + i.intervals[k] = NewInterval(ki.Start, ki.Stop-1) + return + } else if v < ki.Stop-1 { + x := NewInterval(ki.Start, v) + ki.Start = v + 1 + // i.intervals.splice(k, 0, x) + i.intervals = append(i.intervals[0:k], append([]*Interval{x}, i.intervals[k:]...)...) + return + } + } + } +} + +func (i *IntervalSet) String() string { + return i.StringVerbose(nil, nil, false) +} + +func (i *IntervalSet) StringVerbose(literalNames []string, symbolicNames []string, elemsAreChar bool) string { + + if i.intervals == nil { + return "{}" + } else if literalNames != nil || symbolicNames != nil { + return i.toTokenString(literalNames, symbolicNames) + } else if elemsAreChar { + return i.toCharString() + } + + return i.toIndexString() +} + +func (i *IntervalSet) GetIntervals() []*Interval { + return i.intervals +} + +func (i *IntervalSet) toCharString() string { + names := make([]string, len(i.intervals)) + + var sb strings.Builder + + for j := 0; j < len(i.intervals); j++ { + v := i.intervals[j] + if v.Stop == v.Start+1 { + if v.Start == TokenEOF { + names = append(names, "") + } else { + sb.WriteByte('\'') + sb.WriteRune(rune(v.Start)) + sb.WriteByte('\'') + names = append(names, sb.String()) + sb.Reset() + } + } else { + sb.WriteByte('\'') + sb.WriteRune(rune(v.Start)) + sb.WriteString("'..'") + sb.WriteRune(rune(v.Stop - 1)) + sb.WriteByte('\'') + names = append(names, sb.String()) + sb.Reset() + } + } + if len(names) > 1 { + return "{" + strings.Join(names, ", ") + "}" + } + + return names[0] +} + +func (i *IntervalSet) toIndexString() string { + + names := make([]string, 0) + for j := 0; j < len(i.intervals); j++ { + v := i.intervals[j] + if v.Stop == v.Start+1 { + if v.Start == TokenEOF { + names = append(names, "") + } else { + names = append(names, strconv.Itoa(v.Start)) + } + } else { + names = append(names, strconv.Itoa(v.Start)+".."+strconv.Itoa(v.Stop-1)) + } + } + if len(names) > 1 { + return "{" + strings.Join(names, ", ") + "}" + } + + return names[0] +} + +func (i *IntervalSet) toTokenString(literalNames []string, symbolicNames []string) string { + names := make([]string, 0) + for _, v := range i.intervals { + for j := v.Start; j < v.Stop; j++ { + names = append(names, i.elementName(literalNames, symbolicNames, j)) + } + } + if len(names) > 1 { + return "{" + strings.Join(names, ", ") + "}" + } + + return names[0] +} + +func (i *IntervalSet) elementName(literalNames []string, symbolicNames []string, a int) string { + if a == TokenEOF { + return "" + } else if a == TokenEpsilon { + return "" + } else { + if a < len(literalNames) && literalNames[a] != "" { + return literalNames[a] + } + + return symbolicNames[a] + } +} diff --git a/runtime/Go/antlr/v4/jcollect.go b/runtime/Go/antlr/v4/jcollect.go new file mode 100644 index 0000000000..8fb01c5bd9 --- /dev/null +++ b/runtime/Go/antlr/v4/jcollect.go @@ -0,0 +1,195 @@ +package antlr + +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +import "sort" + +// Collectable is an interface that a struct should implement if it is to be +// usable as a key in these collections. +type Collectable[T any] interface { + Hash() int + Equals(other Collectable[T]) bool +} + +type Comparator[T any] interface { + Hash1(o T) int + Equals2(T, T) bool +} + +// JStore implements a container that allows the use of a struct to calculate the key +// for a collection of values akin to map. This is not meant to be a full-blown HashMap but just +// serve the needs of the ANTLR Go runtime. +// +// For ease of porting the logic of the runtime from the master target (Java), this collection +// operates in a similar way to Java, in that it can use any struct that supplies a Hash() and Equals() +// function as the key. The values are stored in a standard go map which internally is a form of hashmap +// itself, the key for the go map is the hash supplied by the key object. The collection is able to deal with +// hash conflicts by using a simple slice of values associated with the hash code indexed bucket. That isn't +// particularly efficient, but it is simple, and it works. As this is specifically for the ANTLR runtime, and +// we understand the requirements, then this is fine - this is not a general purpose collection. +type JStore[T any, C Comparator[T]] struct { + store map[int][]T + len int + comparator Comparator[T] +} + +func NewJStore[T any, C Comparator[T]](comparator Comparator[T]) *JStore[T, C] { + + if comparator == nil { + panic("comparator cannot be nil") + } + + s := &JStore[T, C]{ + store: make(map[int][]T, 1), + comparator: comparator, + } + return s +} + +// Put will store given value in the collection. Note that the key for storage is generated from +// the value itself - this is specifically because that is what ANTLR needs - this would not be useful +// as any kind of general collection. +// +// If the key has a hash conflict, then the value will be added to the slice of values associated with the +// hash, unless the value is already in the slice, in which case the existing value is returned. Value equivalence is +// tested by calling the equals() method on the key. +// +// # If the given value is already present in the store, then the existing value is returned as v and exists is set to true +// +// If the given value is not present in the store, then the value is added to the store and returned as v and exists is set to false. +func (s *JStore[T, C]) Put(value T) (v T, exists bool) { //nolint:ireturn + + kh := s.comparator.Hash1(value) + + for _, v1 := range s.store[kh] { + if s.comparator.Equals2(value, v1) { + return v1, true + } + } + s.store[kh] = append(s.store[kh], value) + s.len++ + return value, false +} + +// Get will return the value associated with the key - the type of the key is the same type as the value +// which would not generally be useful, but this is a specific thing for ANTLR where the key is +// generated using the object we are going to store. +func (s *JStore[T, C]) Get(key T) (T, bool) { //nolint:ireturn + + kh := s.comparator.Hash1(key) + + for _, v := range s.store[kh] { + if s.comparator.Equals2(key, v) { + return v, true + } + } + return key, false +} + +// Contains returns true if the given key is present in the store +func (s *JStore[T, C]) Contains(key T) bool { //nolint:ireturn + + _, present := s.Get(key) + return present +} + +func (s *JStore[T, C]) SortedSlice(less func(i, j T) bool) []T { + vs := make([]T, 0, len(s.store)) + for _, v := range s.store { + vs = append(vs, v...) + } + sort.Slice(vs, func(i, j int) bool { + return less(vs[i], vs[j]) + }) + + return vs +} + +func (s *JStore[T, C]) Each(f func(T) bool) { + for _, e := range s.store { + for _, v := range e { + f(v) + } + } +} + +func (s *JStore[T, C]) Len() int { + return s.len +} + +func (s *JStore[T, C]) Values() []T { + vs := make([]T, 0, len(s.store)) + for _, e := range s.store { + for _, v := range e { + vs = append(vs, v) + } + } + return vs +} + +type entry[K, V any] struct { + key K + val V +} + +type JMap[K, V any, C Comparator[K]] struct { + store map[int][]*entry[K, V] + len int + comparator Comparator[K] +} + +func NewJMap[K, V any, C Comparator[K]](comparator Comparator[K]) *JMap[K, V, C] { + return &JMap[K, V, C]{ + store: make(map[int][]*entry[K, V], 1), + comparator: comparator, + } +} + +func (m *JMap[K, V, C]) Put(key K, val V) { + kh := m.comparator.Hash1(key) + m.store[kh] = append(m.store[kh], &entry[K, V]{key, val}) + m.len++ +} + +func (m *JMap[K, V, C]) Values() []V { + vs := make([]V, 0, len(m.store)) + for _, e := range m.store { + for _, v := range e { + vs = append(vs, v.val) + } + } + return vs +} + +func (m *JMap[K, V, C]) Get(key K) (V, bool) { + + var none V + kh := m.comparator.Hash1(key) + for _, e := range m.store[kh] { + if m.comparator.Equals2(e.key, key) { + return e.val, true + } + } + return none, false +} + +func (m *JMap[K, V, C]) Len() int { + return len(m.store) +} + +func (m *JMap[K, V, C]) Delete(key K) { + kh := m.comparator.Hash1(key) + for i, e := range m.store[kh] { + if m.comparator.Equals2(e.key, key) { + m.store[kh] = append(m.store[kh][:i], m.store[kh][i+1:]...) + m.len-- + return + } + } +} + +func (m *JMap[K, V, C]) Clear() { + m.store = make(map[int][]*entry[K, V]) +} diff --git a/runtime/Go/antlr/v4/jcollect_test.go b/runtime/Go/antlr/v4/jcollect_test.go new file mode 100644 index 0000000000..816307a02c --- /dev/null +++ b/runtime/Go/antlr/v4/jcollect_test.go @@ -0,0 +1,15 @@ +package antlr + +import "testing" + +func Test_try(t *testing.T) { + tests := []struct { + name string + }{ + {"Test_try"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + }) + } +} diff --git a/runtime/Go/antlr/v4/lexer.go b/runtime/Go/antlr/v4/lexer.go new file mode 100644 index 0000000000..6533f05164 --- /dev/null +++ b/runtime/Go/antlr/v4/lexer.go @@ -0,0 +1,416 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" +) + +// A lexer is recognizer that draws input symbols from a character stream. +// lexer grammars result in a subclass of this object. A Lexer object +// uses simplified Match() and error recovery mechanisms in the interest +// of speed. +/// + +type Lexer interface { + TokenSource + Recognizer + + Emit() Token + + SetChannel(int) + PushMode(int) + PopMode() int + SetType(int) + SetMode(int) +} + +type BaseLexer struct { + *BaseRecognizer + + Interpreter ILexerATNSimulator + TokenStartCharIndex int + TokenStartLine int + TokenStartColumn int + ActionType int + Virt Lexer // The most derived lexer implementation. Allows virtual method calls. + + input CharStream + factory TokenFactory + tokenFactorySourcePair *TokenSourceCharStreamPair + token Token + hitEOF bool + channel int + thetype int + modeStack IntStack + mode int + text string +} + +func NewBaseLexer(input CharStream) *BaseLexer { + + lexer := new(BaseLexer) + + lexer.BaseRecognizer = NewBaseRecognizer() + + lexer.input = input + lexer.factory = CommonTokenFactoryDEFAULT + lexer.tokenFactorySourcePair = &TokenSourceCharStreamPair{lexer, input} + + lexer.Virt = lexer + + lexer.Interpreter = nil // child classes must populate it + + // The goal of all lexer rules/methods is to create a token object. + // l is an instance variable as multiple rules may collaborate to + // create a single token. NextToken will return l object after + // Matching lexer rule(s). If you subclass to allow multiple token + // emissions, then set l to the last token to be Matched or + // something nonnil so that the auto token emit mechanism will not + // emit another token. + lexer.token = nil + + // What character index in the stream did the current token start at? + // Needed, for example, to get the text for current token. Set at + // the start of NextToken. + lexer.TokenStartCharIndex = -1 + + // The line on which the first character of the token resides/// + lexer.TokenStartLine = -1 + + // The character position of first character within the line/// + lexer.TokenStartColumn = -1 + + // Once we see EOF on char stream, next token will be EOF. + // If you have DONE : EOF then you see DONE EOF. + lexer.hitEOF = false + + // The channel number for the current token/// + lexer.channel = TokenDefaultChannel + + // The token type for the current token/// + lexer.thetype = TokenInvalidType + + lexer.modeStack = make([]int, 0) + lexer.mode = LexerDefaultMode + + // You can set the text for the current token to override what is in + // the input char buffer. Use setText() or can set l instance var. + // / + lexer.text = "" + + return lexer +} + +const ( + LexerDefaultMode = 0 + LexerMore = -2 + LexerSkip = -3 +) + +const ( + LexerDefaultTokenChannel = TokenDefaultChannel + LexerHidden = TokenHiddenChannel + LexerMinCharValue = 0x0000 + LexerMaxCharValue = 0x10FFFF +) + +func (b *BaseLexer) reset() { + // wack Lexer state variables + if b.input != nil { + b.input.Seek(0) // rewind the input + } + b.token = nil + b.thetype = TokenInvalidType + b.channel = TokenDefaultChannel + b.TokenStartCharIndex = -1 + b.TokenStartColumn = -1 + b.TokenStartLine = -1 + b.text = "" + + b.hitEOF = false + b.mode = LexerDefaultMode + b.modeStack = make([]int, 0) + + b.Interpreter.reset() +} + +func (b *BaseLexer) GetInterpreter() ILexerATNSimulator { + return b.Interpreter +} + +func (b *BaseLexer) GetInputStream() CharStream { + return b.input +} + +func (b *BaseLexer) GetSourceName() string { + return b.GrammarFileName +} + +func (b *BaseLexer) SetChannel(v int) { + b.channel = v +} + +func (b *BaseLexer) GetTokenFactory() TokenFactory { + return b.factory +} + +func (b *BaseLexer) setTokenFactory(f TokenFactory) { + b.factory = f +} + +func (b *BaseLexer) safeMatch() (ret int) { + defer func() { + if e := recover(); e != nil { + if re, ok := e.(RecognitionException); ok { + b.notifyListeners(re) // Report error + b.Recover(re) + ret = LexerSkip // default + } + } + }() + + return b.Interpreter.Match(b.input, b.mode) +} + +// Return a token from l source i.e., Match a token on the char stream. +func (b *BaseLexer) NextToken() Token { + if b.input == nil { + panic("NextToken requires a non-nil input stream.") + } + + tokenStartMarker := b.input.Mark() + + // previously in finally block + defer func() { + // make sure we release marker after Match or + // unbuffered char stream will keep buffering + b.input.Release(tokenStartMarker) + }() + + for { + if b.hitEOF { + b.EmitEOF() + return b.token + } + b.token = nil + b.channel = TokenDefaultChannel + b.TokenStartCharIndex = b.input.Index() + b.TokenStartColumn = b.Interpreter.GetCharPositionInLine() + b.TokenStartLine = b.Interpreter.GetLine() + b.text = "" + continueOuter := false + for { + b.thetype = TokenInvalidType + ttype := LexerSkip + + ttype = b.safeMatch() + + if b.input.LA(1) == TokenEOF { + b.hitEOF = true + } + if b.thetype == TokenInvalidType { + b.thetype = ttype + } + if b.thetype == LexerSkip { + continueOuter = true + break + } + if b.thetype != LexerMore { + break + } + } + + if continueOuter { + continue + } + if b.token == nil { + b.Virt.Emit() + } + return b.token + } +} + +// Instruct the lexer to Skip creating a token for current lexer rule +// and look for another token. NextToken() knows to keep looking when +// a lexer rule finishes with token set to SKIPTOKEN. Recall that +// if token==nil at end of any token rule, it creates one for you +// and emits it. +// / +func (b *BaseLexer) Skip() { + b.thetype = LexerSkip +} + +func (b *BaseLexer) More() { + b.thetype = LexerMore +} + +func (b *BaseLexer) SetMode(m int) { + b.mode = m +} + +func (b *BaseLexer) PushMode(m int) { + if LexerATNSimulatorDebug { + fmt.Println("pushMode " + strconv.Itoa(m)) + } + b.modeStack.Push(b.mode) + b.mode = m +} + +func (b *BaseLexer) PopMode() int { + if len(b.modeStack) == 0 { + panic("Empty Stack") + } + if LexerATNSimulatorDebug { + fmt.Println("popMode back to " + fmt.Sprint(b.modeStack[0:len(b.modeStack)-1])) + } + i, _ := b.modeStack.Pop() + b.mode = i + return b.mode +} + +func (b *BaseLexer) inputStream() CharStream { + return b.input +} + +// SetInputStream resets the lexer input stream and associated lexer state. +func (b *BaseLexer) SetInputStream(input CharStream) { + b.input = nil + b.tokenFactorySourcePair = &TokenSourceCharStreamPair{b, b.input} + b.reset() + b.input = input + b.tokenFactorySourcePair = &TokenSourceCharStreamPair{b, b.input} +} + +func (b *BaseLexer) GetTokenSourceCharStreamPair() *TokenSourceCharStreamPair { + return b.tokenFactorySourcePair +} + +// By default does not support multiple emits per NextToken invocation +// for efficiency reasons. Subclass and override l method, NextToken, +// and GetToken (to push tokens into a list and pull from that list +// rather than a single variable as l implementation does). +// / +func (b *BaseLexer) EmitToken(token Token) { + b.token = token +} + +// The standard method called to automatically emit a token at the +// outermost lexical rule. The token object should point into the +// char buffer start..stop. If there is a text override in 'text', +// use that to set the token's text. Override l method to emit +// custom Token objects or provide a Newfactory. +// / +func (b *BaseLexer) Emit() Token { + t := b.factory.Create(b.tokenFactorySourcePair, b.thetype, b.text, b.channel, b.TokenStartCharIndex, b.GetCharIndex()-1, b.TokenStartLine, b.TokenStartColumn) + b.EmitToken(t) + return t +} + +func (b *BaseLexer) EmitEOF() Token { + cpos := b.GetCharPositionInLine() + lpos := b.GetLine() + eof := b.factory.Create(b.tokenFactorySourcePair, TokenEOF, "", TokenDefaultChannel, b.input.Index(), b.input.Index()-1, lpos, cpos) + b.EmitToken(eof) + return eof +} + +func (b *BaseLexer) GetCharPositionInLine() int { + return b.Interpreter.GetCharPositionInLine() +} + +func (b *BaseLexer) GetLine() int { + return b.Interpreter.GetLine() +} + +func (b *BaseLexer) GetType() int { + return b.thetype +} + +func (b *BaseLexer) SetType(t int) { + b.thetype = t +} + +// What is the index of the current character of lookahead?/// +func (b *BaseLexer) GetCharIndex() int { + return b.input.Index() +} + +// Return the text Matched so far for the current token or any text override. +// Set the complete text of l token it wipes any previous changes to the text. +func (b *BaseLexer) GetText() string { + if b.text != "" { + return b.text + } + + return b.Interpreter.GetText(b.input) +} + +func (b *BaseLexer) SetText(text string) { + b.text = text +} + +func (b *BaseLexer) GetATN() *ATN { + return b.Interpreter.ATN() +} + +// Return a list of all Token objects in input char stream. +// Forces load of all tokens. Does not include EOF token. +// / +func (b *BaseLexer) GetAllTokens() []Token { + vl := b.Virt + tokens := make([]Token, 0) + t := vl.NextToken() + for t.GetTokenType() != TokenEOF { + tokens = append(tokens, t) + t = vl.NextToken() + } + return tokens +} + +func (b *BaseLexer) notifyListeners(e RecognitionException) { + start := b.TokenStartCharIndex + stop := b.input.Index() + text := b.input.GetTextFromInterval(NewInterval(start, stop)) + msg := "token recognition error at: '" + text + "'" + listener := b.GetErrorListenerDispatch() + listener.SyntaxError(b, nil, b.TokenStartLine, b.TokenStartColumn, msg, e) +} + +func (b *BaseLexer) getErrorDisplayForChar(c rune) string { + if c == TokenEOF { + return "" + } else if c == '\n' { + return "\\n" + } else if c == '\t' { + return "\\t" + } else if c == '\r' { + return "\\r" + } else { + return string(c) + } +} + +func (b *BaseLexer) getCharErrorDisplay(c rune) string { + return "'" + b.getErrorDisplayForChar(c) + "'" +} + +// Lexers can normally Match any char in it's vocabulary after Matching +// a token, so do the easy thing and just kill a character and hope +// it all works out. You can instead use the rule invocation stack +// to do sophisticated error recovery if you are in a fragment rule. +// / +func (b *BaseLexer) Recover(re RecognitionException) { + if b.input.LA(1) != TokenEOF { + if _, ok := re.(*LexerNoViableAltException); ok { + // Skip a char and try again + b.Interpreter.Consume(b.input) + } else { + // TODO: Do we lose character or line position information? + b.input.Consume() + } + } +} diff --git a/runtime/Go/antlr/v4/lexer_action.go b/runtime/Go/antlr/v4/lexer_action.go new file mode 100644 index 0000000000..111656c295 --- /dev/null +++ b/runtime/Go/antlr/v4/lexer_action.go @@ -0,0 +1,432 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "strconv" + +const ( + LexerActionTypeChannel = 0 //The type of a {@link LexerChannelAction} action. + LexerActionTypeCustom = 1 //The type of a {@link LexerCustomAction} action. + LexerActionTypeMode = 2 //The type of a {@link LexerModeAction} action. + LexerActionTypeMore = 3 //The type of a {@link LexerMoreAction} action. + LexerActionTypePopMode = 4 //The type of a {@link LexerPopModeAction} action. + LexerActionTypePushMode = 5 //The type of a {@link LexerPushModeAction} action. + LexerActionTypeSkip = 6 //The type of a {@link LexerSkipAction} action. + LexerActionTypeType = 7 //The type of a {@link LexerTypeAction} action. +) + +type LexerAction interface { + getActionType() int + getIsPositionDependent() bool + execute(lexer Lexer) + Hash() int + Equals(other LexerAction) bool +} + +type BaseLexerAction struct { + actionType int + isPositionDependent bool +} + +func NewBaseLexerAction(action int) *BaseLexerAction { + la := new(BaseLexerAction) + + la.actionType = action + la.isPositionDependent = false + + return la +} + +func (b *BaseLexerAction) execute(lexer Lexer) { + panic("Not implemented") +} + +func (b *BaseLexerAction) getActionType() int { + return b.actionType +} + +func (b *BaseLexerAction) getIsPositionDependent() bool { + return b.isPositionDependent +} + +func (b *BaseLexerAction) Hash() int { + return b.actionType +} + +func (b *BaseLexerAction) Equals(other LexerAction) bool { + return b == other +} + +// Implements the {@code Skip} lexer action by calling {@link Lexer//Skip}. +// +//

The {@code Skip} command does not have any parameters, so l action is +// implemented as a singleton instance exposed by {@link //INSTANCE}.

+type LexerSkipAction struct { + *BaseLexerAction +} + +func NewLexerSkipAction() *LexerSkipAction { + la := new(LexerSkipAction) + la.BaseLexerAction = NewBaseLexerAction(LexerActionTypeSkip) + return la +} + +// Provides a singleton instance of l parameterless lexer action. +var LexerSkipActionINSTANCE = NewLexerSkipAction() + +func (l *LexerSkipAction) execute(lexer Lexer) { + lexer.Skip() +} + +func (l *LexerSkipAction) String() string { + return "skip" +} + +// Implements the {@code type} lexer action by calling {@link Lexer//setType} +// +// with the assigned type. +type LexerTypeAction struct { + *BaseLexerAction + + thetype int +} + +func NewLexerTypeAction(thetype int) *LexerTypeAction { + l := new(LexerTypeAction) + l.BaseLexerAction = NewBaseLexerAction(LexerActionTypeType) + l.thetype = thetype + return l +} + +func (l *LexerTypeAction) execute(lexer Lexer) { + lexer.SetType(l.thetype) +} + +func (l *LexerTypeAction) Hash() int { + h := murmurInit(0) + h = murmurUpdate(h, l.actionType) + h = murmurUpdate(h, l.thetype) + return murmurFinish(h, 2) +} + +func (l *LexerTypeAction) Equals(other LexerAction) bool { + if l == other { + return true + } else if _, ok := other.(*LexerTypeAction); !ok { + return false + } else { + return l.thetype == other.(*LexerTypeAction).thetype + } +} + +func (l *LexerTypeAction) String() string { + return "actionType(" + strconv.Itoa(l.thetype) + ")" +} + +// Implements the {@code pushMode} lexer action by calling +// {@link Lexer//pushMode} with the assigned mode. +type LexerPushModeAction struct { + *BaseLexerAction + + mode int +} + +func NewLexerPushModeAction(mode int) *LexerPushModeAction { + + l := new(LexerPushModeAction) + l.BaseLexerAction = NewBaseLexerAction(LexerActionTypePushMode) + + l.mode = mode + return l +} + +//

This action is implemented by calling {@link Lexer//pushMode} with the +// value provided by {@link //getMode}.

+func (l *LexerPushModeAction) execute(lexer Lexer) { + lexer.PushMode(l.mode) +} + +func (l *LexerPushModeAction) Hash() int { + h := murmurInit(0) + h = murmurUpdate(h, l.actionType) + h = murmurUpdate(h, l.mode) + return murmurFinish(h, 2) +} + +func (l *LexerPushModeAction) Equals(other LexerAction) bool { + if l == other { + return true + } else if _, ok := other.(*LexerPushModeAction); !ok { + return false + } else { + return l.mode == other.(*LexerPushModeAction).mode + } +} + +func (l *LexerPushModeAction) String() string { + return "pushMode(" + strconv.Itoa(l.mode) + ")" +} + +// Implements the {@code popMode} lexer action by calling {@link Lexer//popMode}. +// +//

The {@code popMode} command does not have any parameters, so l action is +// implemented as a singleton instance exposed by {@link //INSTANCE}.

+type LexerPopModeAction struct { + *BaseLexerAction +} + +func NewLexerPopModeAction() *LexerPopModeAction { + + l := new(LexerPopModeAction) + + l.BaseLexerAction = NewBaseLexerAction(LexerActionTypePopMode) + + return l +} + +var LexerPopModeActionINSTANCE = NewLexerPopModeAction() + +//

This action is implemented by calling {@link Lexer//popMode}.

+func (l *LexerPopModeAction) execute(lexer Lexer) { + lexer.PopMode() +} + +func (l *LexerPopModeAction) String() string { + return "popMode" +} + +// Implements the {@code more} lexer action by calling {@link Lexer//more}. +// +//

The {@code more} command does not have any parameters, so l action is +// implemented as a singleton instance exposed by {@link //INSTANCE}.

+ +type LexerMoreAction struct { + *BaseLexerAction +} + +func NewLexerMoreAction() *LexerMoreAction { + l := new(LexerMoreAction) + l.BaseLexerAction = NewBaseLexerAction(LexerActionTypeMore) + + return l +} + +var LexerMoreActionINSTANCE = NewLexerMoreAction() + +//

This action is implemented by calling {@link Lexer//popMode}.

+func (l *LexerMoreAction) execute(lexer Lexer) { + lexer.More() +} + +func (l *LexerMoreAction) String() string { + return "more" +} + +// Implements the {@code mode} lexer action by calling {@link Lexer//mode} with +// the assigned mode. +type LexerModeAction struct { + *BaseLexerAction + + mode int +} + +func NewLexerModeAction(mode int) *LexerModeAction { + l := new(LexerModeAction) + l.BaseLexerAction = NewBaseLexerAction(LexerActionTypeMode) + l.mode = mode + return l +} + +//

This action is implemented by calling {@link Lexer//mode} with the +// value provided by {@link //getMode}.

+func (l *LexerModeAction) execute(lexer Lexer) { + lexer.SetMode(l.mode) +} + +func (l *LexerModeAction) Hash() int { + h := murmurInit(0) + h = murmurUpdate(h, l.actionType) + h = murmurUpdate(h, l.mode) + return murmurFinish(h, 2) +} + +func (l *LexerModeAction) Equals(other LexerAction) bool { + if l == other { + return true + } else if _, ok := other.(*LexerModeAction); !ok { + return false + } else { + return l.mode == other.(*LexerModeAction).mode + } +} + +func (l *LexerModeAction) String() string { + return "mode(" + strconv.Itoa(l.mode) + ")" +} + +// Executes a custom lexer action by calling {@link Recognizer//action} with the +// rule and action indexes assigned to the custom action. The implementation of +// a custom action is added to the generated code for the lexer in an override +// of {@link Recognizer//action} when the grammar is compiled. +// +//

This class may represent embedded actions created with the {...} +// syntax in ANTLR 4, as well as actions created for lexer commands where the +// command argument could not be evaluated when the grammar was compiled.

+ +// Constructs a custom lexer action with the specified rule and action +// indexes. +// +// @param ruleIndex The rule index to use for calls to +// {@link Recognizer//action}. +// @param actionIndex The action index to use for calls to +// {@link Recognizer//action}. + +type LexerCustomAction struct { + *BaseLexerAction + ruleIndex, actionIndex int +} + +func NewLexerCustomAction(ruleIndex, actionIndex int) *LexerCustomAction { + l := new(LexerCustomAction) + l.BaseLexerAction = NewBaseLexerAction(LexerActionTypeCustom) + l.ruleIndex = ruleIndex + l.actionIndex = actionIndex + l.isPositionDependent = true + return l +} + +//

Custom actions are implemented by calling {@link Lexer//action} with the +// appropriate rule and action indexes.

+func (l *LexerCustomAction) execute(lexer Lexer) { + lexer.Action(nil, l.ruleIndex, l.actionIndex) +} + +func (l *LexerCustomAction) Hash() int { + h := murmurInit(0) + h = murmurUpdate(h, l.actionType) + h = murmurUpdate(h, l.ruleIndex) + h = murmurUpdate(h, l.actionIndex) + return murmurFinish(h, 3) +} + +func (l *LexerCustomAction) Equals(other LexerAction) bool { + if l == other { + return true + } else if _, ok := other.(*LexerCustomAction); !ok { + return false + } else { + return l.ruleIndex == other.(*LexerCustomAction).ruleIndex && + l.actionIndex == other.(*LexerCustomAction).actionIndex + } +} + +// Implements the {@code channel} lexer action by calling +// {@link Lexer//setChannel} with the assigned channel. +// Constructs a New{@code channel} action with the specified channel value. +// @param channel The channel value to pass to {@link Lexer//setChannel}. +type LexerChannelAction struct { + *BaseLexerAction + + channel int +} + +func NewLexerChannelAction(channel int) *LexerChannelAction { + l := new(LexerChannelAction) + l.BaseLexerAction = NewBaseLexerAction(LexerActionTypeChannel) + l.channel = channel + return l +} + +//

This action is implemented by calling {@link Lexer//setChannel} with the +// value provided by {@link //getChannel}.

+func (l *LexerChannelAction) execute(lexer Lexer) { + lexer.SetChannel(l.channel) +} + +func (l *LexerChannelAction) Hash() int { + h := murmurInit(0) + h = murmurUpdate(h, l.actionType) + h = murmurUpdate(h, l.channel) + return murmurFinish(h, 2) +} + +func (l *LexerChannelAction) Equals(other LexerAction) bool { + if l == other { + return true + } else if _, ok := other.(*LexerChannelAction); !ok { + return false + } else { + return l.channel == other.(*LexerChannelAction).channel + } +} + +func (l *LexerChannelAction) String() string { + return "channel(" + strconv.Itoa(l.channel) + ")" +} + +// This implementation of {@link LexerAction} is used for tracking input offsets +// for position-dependent actions within a {@link LexerActionExecutor}. +// +//

This action is not serialized as part of the ATN, and is only required for +// position-dependent lexer actions which appear at a location other than the +// end of a rule. For more information about DFA optimizations employed for +// lexer actions, see {@link LexerActionExecutor//append} and +// {@link LexerActionExecutor//fixOffsetBeforeMatch}.

+ +// Constructs a Newindexed custom action by associating a character offset +// with a {@link LexerAction}. +// +//

Note: This class is only required for lexer actions for which +// {@link LexerAction//isPositionDependent} returns {@code true}.

+// +// @param offset The offset into the input {@link CharStream}, relative to +// the token start index, at which the specified lexer action should be +// executed. +// @param action The lexer action to execute at a particular offset in the +// input {@link CharStream}. +type LexerIndexedCustomAction struct { + *BaseLexerAction + + offset int + lexerAction LexerAction + isPositionDependent bool +} + +func NewLexerIndexedCustomAction(offset int, lexerAction LexerAction) *LexerIndexedCustomAction { + + l := new(LexerIndexedCustomAction) + l.BaseLexerAction = NewBaseLexerAction(lexerAction.getActionType()) + + l.offset = offset + l.lexerAction = lexerAction + l.isPositionDependent = true + + return l +} + +//

This method calls {@link //execute} on the result of {@link //getAction} +// using the provided {@code lexer}.

+func (l *LexerIndexedCustomAction) execute(lexer Lexer) { + // assume the input stream position was properly set by the calling code + l.lexerAction.execute(lexer) +} + +func (l *LexerIndexedCustomAction) Hash() int { + h := murmurInit(0) + h = murmurUpdate(h, l.offset) + h = murmurUpdate(h, l.lexerAction.Hash()) + return murmurFinish(h, 2) +} + +func (l *LexerIndexedCustomAction) equals(other LexerAction) bool { + if l == other { + return true + } else if _, ok := other.(*LexerIndexedCustomAction); !ok { + return false + } else { + return l.offset == other.(*LexerIndexedCustomAction).offset && + l.lexerAction.Equals(other.(*LexerIndexedCustomAction).lexerAction) + } +} diff --git a/runtime/Go/antlr/v4/lexer_action_executor.go b/runtime/Go/antlr/v4/lexer_action_executor.go new file mode 100644 index 0000000000..be1ba7a7e3 --- /dev/null +++ b/runtime/Go/antlr/v4/lexer_action_executor.go @@ -0,0 +1,186 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "golang.org/x/exp/slices" + +// Represents an executor for a sequence of lexer actions which traversed during +// the Matching operation of a lexer rule (token). +// +//

The executor tracks position information for position-dependent lexer actions +// efficiently, ensuring that actions appearing only at the end of the rule do +// not cause bloating of the {@link DFA} created for the lexer.

+ +type LexerActionExecutor struct { + lexerActions []LexerAction + cachedHash int +} + +func NewLexerActionExecutor(lexerActions []LexerAction) *LexerActionExecutor { + + if lexerActions == nil { + lexerActions = make([]LexerAction, 0) + } + + l := new(LexerActionExecutor) + + l.lexerActions = lexerActions + + // Caches the result of {@link //hashCode} since the hash code is an element + // of the performance-critical {@link LexerATNConfig//hashCode} operation. + l.cachedHash = murmurInit(57) + for _, a := range lexerActions { + l.cachedHash = murmurUpdate(l.cachedHash, a.Hash()) + } + + return l +} + +// Creates a {@link LexerActionExecutor} which executes the actions for +// the input {@code lexerActionExecutor} followed by a specified +// {@code lexerAction}. +// +// @param lexerActionExecutor The executor for actions already traversed by +// the lexer while Matching a token within a particular +// {@link LexerATNConfig}. If this is {@code nil}, the method behaves as +// though it were an empty executor. +// @param lexerAction The lexer action to execute after the actions +// specified in {@code lexerActionExecutor}. +// +// @return A {@link LexerActionExecutor} for executing the combine actions +// of {@code lexerActionExecutor} and {@code lexerAction}. +func LexerActionExecutorappend(lexerActionExecutor *LexerActionExecutor, lexerAction LexerAction) *LexerActionExecutor { + if lexerActionExecutor == nil { + return NewLexerActionExecutor([]LexerAction{lexerAction}) + } + + return NewLexerActionExecutor(append(lexerActionExecutor.lexerActions, lexerAction)) +} + +// Creates a {@link LexerActionExecutor} which encodes the current offset +// for position-dependent lexer actions. +// +//

Normally, when the executor encounters lexer actions where +// {@link LexerAction//isPositionDependent} returns {@code true}, it calls +// {@link IntStream//seek} on the input {@link CharStream} to set the input +// position to the end of the current token. This behavior provides +// for efficient DFA representation of lexer actions which appear at the end +// of a lexer rule, even when the lexer rule Matches a variable number of +// characters.

+// +//

Prior to traversing a Match transition in the ATN, the current offset +// from the token start index is assigned to all position-dependent lexer +// actions which have not already been assigned a fixed offset. By storing +// the offsets relative to the token start index, the DFA representation of +// lexer actions which appear in the middle of tokens remains efficient due +// to sharing among tokens of the same length, regardless of their absolute +// position in the input stream.

+// +//

If the current executor already has offsets assigned to all +// position-dependent lexer actions, the method returns {@code this}.

+// +// @param offset The current offset to assign to all position-dependent +// lexer actions which do not already have offsets assigned. +// +// @return A {@link LexerActionExecutor} which stores input stream offsets +// for all position-dependent lexer actions. +// / +func (l *LexerActionExecutor) fixOffsetBeforeMatch(offset int) *LexerActionExecutor { + var updatedLexerActions []LexerAction + for i := 0; i < len(l.lexerActions); i++ { + _, ok := l.lexerActions[i].(*LexerIndexedCustomAction) + if l.lexerActions[i].getIsPositionDependent() && !ok { + if updatedLexerActions == nil { + updatedLexerActions = make([]LexerAction, 0) + + for _, a := range l.lexerActions { + updatedLexerActions = append(updatedLexerActions, a) + } + } + + updatedLexerActions[i] = NewLexerIndexedCustomAction(offset, l.lexerActions[i]) + } + } + if updatedLexerActions == nil { + return l + } + + return NewLexerActionExecutor(updatedLexerActions) +} + +// Execute the actions encapsulated by l executor within the context of a +// particular {@link Lexer}. +// +//

This method calls {@link IntStream//seek} to set the position of the +// {@code input} {@link CharStream} prior to calling +// {@link LexerAction//execute} on a position-dependent action. Before the +// method returns, the input position will be restored to the same position +// it was in when the method was invoked.

+// +// @param lexer The lexer instance. +// @param input The input stream which is the source for the current token. +// When l method is called, the current {@link IntStream//index} for +// {@code input} should be the start of the following token, i.e. 1 +// character past the end of the current token. +// @param startIndex The token start index. This value may be passed to +// {@link IntStream//seek} to set the {@code input} position to the beginning +// of the token. +// / +func (l *LexerActionExecutor) execute(lexer Lexer, input CharStream, startIndex int) { + requiresSeek := false + stopIndex := input.Index() + + defer func() { + if requiresSeek { + input.Seek(stopIndex) + } + }() + + for i := 0; i < len(l.lexerActions); i++ { + lexerAction := l.lexerActions[i] + if la, ok := lexerAction.(*LexerIndexedCustomAction); ok { + offset := la.offset + input.Seek(startIndex + offset) + lexerAction = la.lexerAction + requiresSeek = (startIndex + offset) != stopIndex + } else if lexerAction.getIsPositionDependent() { + input.Seek(stopIndex) + requiresSeek = false + } + lexerAction.execute(lexer) + } +} + +func (l *LexerActionExecutor) Hash() int { + if l == nil { + // TODO: Why is this here? l should not be nil + return 61 + } + + // TODO: This is created from the action itself when the struct is created - will this be an issue at some point? Java uses the runtime assign hashcode + return l.cachedHash +} + +func (l *LexerActionExecutor) Equals(other interface{}) bool { + if l == other { + return true + } + othert, ok := other.(*LexerActionExecutor) + if !ok { + return false + } + if othert == nil { + return false + } + if l.cachedHash != othert.cachedHash { + return false + } + if len(l.lexerActions) != len(othert.lexerActions) { + return false + } + return slices.EqualFunc(l.lexerActions, othert.lexerActions, func(i, j LexerAction) bool { + return i.Equals(j) + }) +} diff --git a/runtime/Go/antlr/v4/lexer_atn_simulator.go b/runtime/Go/antlr/v4/lexer_atn_simulator.go new file mode 100644 index 0000000000..c573b75210 --- /dev/null +++ b/runtime/Go/antlr/v4/lexer_atn_simulator.go @@ -0,0 +1,684 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" + "strings" +) + +var ( + LexerATNSimulatorDebug = false + LexerATNSimulatorDFADebug = false + + LexerATNSimulatorMinDFAEdge = 0 + LexerATNSimulatorMaxDFAEdge = 127 // forces unicode to stay in ATN + + LexerATNSimulatorMatchCalls = 0 +) + +type ILexerATNSimulator interface { + IATNSimulator + + reset() + Match(input CharStream, mode int) int + GetCharPositionInLine() int + GetLine() int + GetText(input CharStream) string + Consume(input CharStream) +} + +type LexerATNSimulator struct { + *BaseATNSimulator + + recog Lexer + predictionMode int + mergeCache DoubleDict + startIndex int + Line int + CharPositionInLine int + mode int + prevAccept *SimState + MatchCalls int +} + +func NewLexerATNSimulator(recog Lexer, atn *ATN, decisionToDFA []*DFA, sharedContextCache *PredictionContextCache) *LexerATNSimulator { + l := new(LexerATNSimulator) + + l.BaseATNSimulator = NewBaseATNSimulator(atn, sharedContextCache) + + l.decisionToDFA = decisionToDFA + l.recog = recog + // The current token's starting index into the character stream. + // Shared across DFA to ATN simulation in case the ATN fails and the + // DFA did not have a previous accept state. In l case, we use the + // ATN-generated exception object. + l.startIndex = -1 + // line number 1..n within the input/// + l.Line = 1 + // The index of the character relative to the beginning of the line + // 0..n-1/// + l.CharPositionInLine = 0 + l.mode = LexerDefaultMode + // Used during DFA/ATN exec to record the most recent accept configuration + // info + l.prevAccept = NewSimState() + // done + return l +} + +func (l *LexerATNSimulator) copyState(simulator *LexerATNSimulator) { + l.CharPositionInLine = simulator.CharPositionInLine + l.Line = simulator.Line + l.mode = simulator.mode + l.startIndex = simulator.startIndex +} + +func (l *LexerATNSimulator) Match(input CharStream, mode int) int { + l.MatchCalls++ + l.mode = mode + mark := input.Mark() + + defer func() { + input.Release(mark) + }() + + l.startIndex = input.Index() + l.prevAccept.reset() + + dfa := l.decisionToDFA[mode] + + var s0 *DFAState + l.atn.stateMu.RLock() + s0 = dfa.getS0() + l.atn.stateMu.RUnlock() + + if s0 == nil { + return l.MatchATN(input) + } + + return l.execATN(input, s0) +} + +func (l *LexerATNSimulator) reset() { + l.prevAccept.reset() + l.startIndex = -1 + l.Line = 1 + l.CharPositionInLine = 0 + l.mode = LexerDefaultMode +} + +func (l *LexerATNSimulator) MatchATN(input CharStream) int { + startState := l.atn.modeToStartState[l.mode] + + if LexerATNSimulatorDebug { + fmt.Println("MatchATN mode " + strconv.Itoa(l.mode) + " start: " + startState.String()) + } + oldMode := l.mode + s0Closure := l.computeStartState(input, startState) + suppressEdge := s0Closure.hasSemanticContext + s0Closure.hasSemanticContext = false + + next := l.addDFAState(s0Closure, suppressEdge) + + predict := l.execATN(input, next) + + if LexerATNSimulatorDebug { + fmt.Println("DFA after MatchATN: " + l.decisionToDFA[oldMode].ToLexerString()) + } + return predict +} + +func (l *LexerATNSimulator) execATN(input CharStream, ds0 *DFAState) int { + + if LexerATNSimulatorDebug { + fmt.Println("start state closure=" + ds0.configs.String()) + } + if ds0.isAcceptState { + // allow zero-length tokens + l.captureSimState(l.prevAccept, input, ds0) + } + t := input.LA(1) + s := ds0 // s is current/from DFA state + + for { // while more work + if LexerATNSimulatorDebug { + fmt.Println("execATN loop starting closure: " + s.configs.String()) + } + + // As we move src->trg, src->trg, we keep track of the previous trg to + // avoid looking up the DFA state again, which is expensive. + // If the previous target was already part of the DFA, we might + // be able to avoid doing a reach operation upon t. If s!=nil, + // it means that semantic predicates didn't prevent us from + // creating a DFA state. Once we know s!=nil, we check to see if + // the DFA state has an edge already for t. If so, we can just reuse + // it's configuration set there's no point in re-computing it. + // This is kind of like doing DFA simulation within the ATN + // simulation because DFA simulation is really just a way to avoid + // computing reach/closure sets. Technically, once we know that + // we have a previously added DFA state, we could jump over to + // the DFA simulator. But, that would mean popping back and forth + // a lot and making things more complicated algorithmically. + // This optimization makes a lot of sense for loops within DFA. + // A character will take us back to an existing DFA state + // that already has lots of edges out of it. e.g., .* in comments. + target := l.getExistingTargetState(s, t) + if target == nil { + target = l.computeTargetState(input, s, t) + // print("Computed:" + str(target)) + } + if target == ATNSimulatorError { + break + } + // If l is a consumable input element, make sure to consume before + // capturing the accept state so the input index, line, and char + // position accurately reflect the state of the interpreter at the + // end of the token. + if t != TokenEOF { + l.Consume(input) + } + if target.isAcceptState { + l.captureSimState(l.prevAccept, input, target) + if t == TokenEOF { + break + } + } + t = input.LA(1) + s = target // flip current DFA target becomes Newsrc/from state + } + + return l.failOrAccept(l.prevAccept, input, s.configs, t) +} + +// Get an existing target state for an edge in the DFA. If the target state +// for the edge has not yet been computed or is otherwise not available, +// l method returns {@code nil}. +// +// @param s The current DFA state +// @param t The next input symbol +// @return The existing target DFA state for the given input symbol +// {@code t}, or {@code nil} if the target state for l edge is not +// already cached +func (l *LexerATNSimulator) getExistingTargetState(s *DFAState, t int) *DFAState { + if t < LexerATNSimulatorMinDFAEdge || t > LexerATNSimulatorMaxDFAEdge { + return nil + } + + l.atn.edgeMu.RLock() + defer l.atn.edgeMu.RUnlock() + if s.getEdges() == nil { + return nil + } + target := s.getIthEdge(t - LexerATNSimulatorMinDFAEdge) + if LexerATNSimulatorDebug && target != nil { + fmt.Println("reuse state " + strconv.Itoa(s.stateNumber) + " edge to " + strconv.Itoa(target.stateNumber)) + } + return target +} + +// Compute a target state for an edge in the DFA, and attempt to add the +// computed state and corresponding edge to the DFA. +// +// @param input The input stream +// @param s The current DFA state +// @param t The next input symbol +// +// @return The computed target DFA state for the given input symbol +// {@code t}. If {@code t} does not lead to a valid DFA state, l method +// returns {@link //ERROR}. +func (l *LexerATNSimulator) computeTargetState(input CharStream, s *DFAState, t int) *DFAState { + reach := NewOrderedATNConfigSet() + + // if we don't find an existing DFA state + // Fill reach starting from closure, following t transitions + l.getReachableConfigSet(input, s.configs, reach.BaseATNConfigSet, t) + + if len(reach.configs) == 0 { // we got nowhere on t from s + if !reach.hasSemanticContext { + // we got nowhere on t, don't panic out l knowledge it'd + // cause a failover from DFA later. + l.addDFAEdge(s, t, ATNSimulatorError, nil) + } + // stop when we can't Match any more char + return ATNSimulatorError + } + // Add an edge from s to target DFA found/created for reach + return l.addDFAEdge(s, t, nil, reach.BaseATNConfigSet) +} + +func (l *LexerATNSimulator) failOrAccept(prevAccept *SimState, input CharStream, reach ATNConfigSet, t int) int { + if l.prevAccept.dfaState != nil { + lexerActionExecutor := prevAccept.dfaState.lexerActionExecutor + l.accept(input, lexerActionExecutor, l.startIndex, prevAccept.index, prevAccept.line, prevAccept.column) + return prevAccept.dfaState.prediction + } + + // if no accept and EOF is first char, return EOF + if t == TokenEOF && input.Index() == l.startIndex { + return TokenEOF + } + + panic(NewLexerNoViableAltException(l.recog, input, l.startIndex, reach)) +} + +// Given a starting configuration set, figure out all ATN configurations +// we can reach upon input {@code t}. Parameter {@code reach} is a return +// parameter. +func (l *LexerATNSimulator) getReachableConfigSet(input CharStream, closure ATNConfigSet, reach ATNConfigSet, t int) { + // l is used to Skip processing for configs which have a lower priority + // than a config that already reached an accept state for the same rule + SkipAlt := ATNInvalidAltNumber + + for _, cfg := range closure.GetItems() { + currentAltReachedAcceptState := (cfg.GetAlt() == SkipAlt) + if currentAltReachedAcceptState && cfg.(*LexerATNConfig).passedThroughNonGreedyDecision { + continue + } + + if LexerATNSimulatorDebug { + + fmt.Printf("testing %s at %s\n", l.GetTokenName(t), cfg.String()) // l.recog, true)) + } + + for _, trans := range cfg.GetState().GetTransitions() { + target := l.getReachableTarget(trans, t) + if target != nil { + lexerActionExecutor := cfg.(*LexerATNConfig).lexerActionExecutor + if lexerActionExecutor != nil { + lexerActionExecutor = lexerActionExecutor.fixOffsetBeforeMatch(input.Index() - l.startIndex) + } + treatEOFAsEpsilon := (t == TokenEOF) + config := NewLexerATNConfig3(cfg.(*LexerATNConfig), target, lexerActionExecutor) + if l.closure(input, config, reach, + currentAltReachedAcceptState, true, treatEOFAsEpsilon) { + // any remaining configs for l alt have a lower priority + // than the one that just reached an accept state. + SkipAlt = cfg.GetAlt() + } + } + } + } +} + +func (l *LexerATNSimulator) accept(input CharStream, lexerActionExecutor *LexerActionExecutor, startIndex, index, line, charPos int) { + if LexerATNSimulatorDebug { + fmt.Printf("ACTION %v\n", lexerActionExecutor) + } + // seek to after last char in token + input.Seek(index) + l.Line = line + l.CharPositionInLine = charPos + if lexerActionExecutor != nil && l.recog != nil { + lexerActionExecutor.execute(l.recog, input, startIndex) + } +} + +func (l *LexerATNSimulator) getReachableTarget(trans Transition, t int) ATNState { + if trans.Matches(t, 0, LexerMaxCharValue) { + return trans.getTarget() + } + + return nil +} + +func (l *LexerATNSimulator) computeStartState(input CharStream, p ATNState) *OrderedATNConfigSet { + configs := NewOrderedATNConfigSet() + for i := 0; i < len(p.GetTransitions()); i++ { + target := p.GetTransitions()[i].getTarget() + cfg := NewLexerATNConfig6(target, i+1, BasePredictionContextEMPTY) + l.closure(input, cfg, configs, false, false, false) + } + + return configs +} + +// Since the alternatives within any lexer decision are ordered by +// preference, l method stops pursuing the closure as soon as an accept +// state is reached. After the first accept state is reached by depth-first +// search from {@code config}, all other (potentially reachable) states for +// l rule would have a lower priority. +// +// @return {@code true} if an accept state is reached, otherwise +// {@code false}. +func (l *LexerATNSimulator) closure(input CharStream, config *LexerATNConfig, configs ATNConfigSet, + currentAltReachedAcceptState, speculative, treatEOFAsEpsilon bool) bool { + + if LexerATNSimulatorDebug { + fmt.Println("closure(" + config.String() + ")") // config.String(l.recog, true) + ")") + } + + _, ok := config.state.(*RuleStopState) + if ok { + + if LexerATNSimulatorDebug { + if l.recog != nil { + fmt.Printf("closure at %s rule stop %s\n", l.recog.GetRuleNames()[config.state.GetRuleIndex()], config) + } else { + fmt.Printf("closure at rule stop %s\n", config) + } + } + + if config.context == nil || config.context.hasEmptyPath() { + if config.context == nil || config.context.isEmpty() { + configs.Add(config, nil) + return true + } + + configs.Add(NewLexerATNConfig2(config, config.state, BasePredictionContextEMPTY), nil) + currentAltReachedAcceptState = true + } + if config.context != nil && !config.context.isEmpty() { + for i := 0; i < config.context.length(); i++ { + if config.context.getReturnState(i) != BasePredictionContextEmptyReturnState { + newContext := config.context.GetParent(i) // "pop" return state + returnState := l.atn.states[config.context.getReturnState(i)] + cfg := NewLexerATNConfig2(config, returnState, newContext) + currentAltReachedAcceptState = l.closure(input, cfg, configs, currentAltReachedAcceptState, speculative, treatEOFAsEpsilon) + } + } + } + return currentAltReachedAcceptState + } + // optimization + if !config.state.GetEpsilonOnlyTransitions() { + if !currentAltReachedAcceptState || !config.passedThroughNonGreedyDecision { + configs.Add(config, nil) + } + } + for j := 0; j < len(config.state.GetTransitions()); j++ { + trans := config.state.GetTransitions()[j] + cfg := l.getEpsilonTarget(input, config, trans, configs, speculative, treatEOFAsEpsilon) + if cfg != nil { + currentAltReachedAcceptState = l.closure(input, cfg, configs, + currentAltReachedAcceptState, speculative, treatEOFAsEpsilon) + } + } + return currentAltReachedAcceptState +} + +// side-effect: can alter configs.hasSemanticContext +func (l *LexerATNSimulator) getEpsilonTarget(input CharStream, config *LexerATNConfig, trans Transition, + configs ATNConfigSet, speculative, treatEOFAsEpsilon bool) *LexerATNConfig { + + var cfg *LexerATNConfig + + if trans.getSerializationType() == TransitionRULE { + + rt := trans.(*RuleTransition) + newContext := SingletonBasePredictionContextCreate(config.context, rt.followState.GetStateNumber()) + cfg = NewLexerATNConfig2(config, trans.getTarget(), newContext) + + } else if trans.getSerializationType() == TransitionPRECEDENCE { + panic("Precedence predicates are not supported in lexers.") + } else if trans.getSerializationType() == TransitionPREDICATE { + // Track traversing semantic predicates. If we traverse, + // we cannot add a DFA state for l "reach" computation + // because the DFA would not test the predicate again in the + // future. Rather than creating collections of semantic predicates + // like v3 and testing them on prediction, v4 will test them on the + // fly all the time using the ATN not the DFA. This is slower but + // semantically it's not used that often. One of the key elements to + // l predicate mechanism is not adding DFA states that see + // predicates immediately afterwards in the ATN. For example, + + // a : ID {p1}? | ID {p2}? + + // should create the start state for rule 'a' (to save start state + // competition), but should not create target of ID state. The + // collection of ATN states the following ID references includes + // states reached by traversing predicates. Since l is when we + // test them, we cannot cash the DFA state target of ID. + + pt := trans.(*PredicateTransition) + + if LexerATNSimulatorDebug { + fmt.Println("EVAL rule " + strconv.Itoa(trans.(*PredicateTransition).ruleIndex) + ":" + strconv.Itoa(pt.predIndex)) + } + configs.SetHasSemanticContext(true) + if l.evaluatePredicate(input, pt.ruleIndex, pt.predIndex, speculative) { + cfg = NewLexerATNConfig4(config, trans.getTarget()) + } + } else if trans.getSerializationType() == TransitionACTION { + if config.context == nil || config.context.hasEmptyPath() { + // execute actions anywhere in the start rule for a token. + // + // TODO: if the entry rule is invoked recursively, some + // actions may be executed during the recursive call. The + // problem can appear when hasEmptyPath() is true but + // isEmpty() is false. In l case, the config needs to be + // split into two contexts - one with just the empty path + // and another with everything but the empty path. + // Unfortunately, the current algorithm does not allow + // getEpsilonTarget to return two configurations, so + // additional modifications are needed before we can support + // the split operation. + lexerActionExecutor := LexerActionExecutorappend(config.lexerActionExecutor, l.atn.lexerActions[trans.(*ActionTransition).actionIndex]) + cfg = NewLexerATNConfig3(config, trans.getTarget(), lexerActionExecutor) + } else { + // ignore actions in referenced rules + cfg = NewLexerATNConfig4(config, trans.getTarget()) + } + } else if trans.getSerializationType() == TransitionEPSILON { + cfg = NewLexerATNConfig4(config, trans.getTarget()) + } else if trans.getSerializationType() == TransitionATOM || + trans.getSerializationType() == TransitionRANGE || + trans.getSerializationType() == TransitionSET { + if treatEOFAsEpsilon { + if trans.Matches(TokenEOF, 0, LexerMaxCharValue) { + cfg = NewLexerATNConfig4(config, trans.getTarget()) + } + } + } + return cfg +} + +// Evaluate a predicate specified in the lexer. +// +//

If {@code speculative} is {@code true}, l method was called before +// {@link //consume} for the Matched character. This method should call +// {@link //consume} before evaluating the predicate to ensure position +// sensitive values, including {@link Lexer//GetText}, {@link Lexer//GetLine}, +// and {@link Lexer//getcolumn}, properly reflect the current +// lexer state. This method should restore {@code input} and the simulator +// to the original state before returning (i.e. undo the actions made by the +// call to {@link //consume}.

+// +// @param input The input stream. +// @param ruleIndex The rule containing the predicate. +// @param predIndex The index of the predicate within the rule. +// @param speculative {@code true} if the current index in {@code input} is +// one character before the predicate's location. +// +// @return {@code true} if the specified predicate evaluates to +// {@code true}. +// / +func (l *LexerATNSimulator) evaluatePredicate(input CharStream, ruleIndex, predIndex int, speculative bool) bool { + // assume true if no recognizer was provided + if l.recog == nil { + return true + } + if !speculative { + return l.recog.Sempred(nil, ruleIndex, predIndex) + } + savedcolumn := l.CharPositionInLine + savedLine := l.Line + index := input.Index() + marker := input.Mark() + + defer func() { + l.CharPositionInLine = savedcolumn + l.Line = savedLine + input.Seek(index) + input.Release(marker) + }() + + l.Consume(input) + return l.recog.Sempred(nil, ruleIndex, predIndex) +} + +func (l *LexerATNSimulator) captureSimState(settings *SimState, input CharStream, dfaState *DFAState) { + settings.index = input.Index() + settings.line = l.Line + settings.column = l.CharPositionInLine + settings.dfaState = dfaState +} + +func (l *LexerATNSimulator) addDFAEdge(from *DFAState, tk int, to *DFAState, cfgs ATNConfigSet) *DFAState { + if to == nil && cfgs != nil { + // leading to l call, ATNConfigSet.hasSemanticContext is used as a + // marker indicating dynamic predicate evaluation makes l edge + // dependent on the specific input sequence, so the static edge in the + // DFA should be omitted. The target DFAState is still created since + // execATN has the ability to reSynchronize with the DFA state cache + // following the predicate evaluation step. + // + // TJP notes: next time through the DFA, we see a pred again and eval. + // If that gets us to a previously created (but dangling) DFA + // state, we can continue in pure DFA mode from there. + // / + suppressEdge := cfgs.HasSemanticContext() + cfgs.SetHasSemanticContext(false) + + to = l.addDFAState(cfgs, true) + + if suppressEdge { + return to + } + } + // add the edge + if tk < LexerATNSimulatorMinDFAEdge || tk > LexerATNSimulatorMaxDFAEdge { + // Only track edges within the DFA bounds + return to + } + if LexerATNSimulatorDebug { + fmt.Println("EDGE " + from.String() + " -> " + to.String() + " upon " + strconv.Itoa(tk)) + } + l.atn.edgeMu.Lock() + defer l.atn.edgeMu.Unlock() + if from.getEdges() == nil { + // make room for tokens 1..n and -1 masquerading as index 0 + from.setEdges(make([]*DFAState, LexerATNSimulatorMaxDFAEdge-LexerATNSimulatorMinDFAEdge+1)) + } + from.setIthEdge(tk-LexerATNSimulatorMinDFAEdge, to) // connect + + return to +} + +// Add a NewDFA state if there isn't one with l set of +// configurations already. This method also detects the first +// configuration containing an ATN rule stop state. Later, when +// traversing the DFA, we will know which rule to accept. +func (l *LexerATNSimulator) addDFAState(configs ATNConfigSet, suppressEdge bool) *DFAState { + + proposed := NewDFAState(-1, configs) + var firstConfigWithRuleStopState ATNConfig + + for _, cfg := range configs.GetItems() { + + _, ok := cfg.GetState().(*RuleStopState) + + if ok { + firstConfigWithRuleStopState = cfg + break + } + } + if firstConfigWithRuleStopState != nil { + proposed.isAcceptState = true + proposed.lexerActionExecutor = firstConfigWithRuleStopState.(*LexerATNConfig).lexerActionExecutor + proposed.setPrediction(l.atn.ruleToTokenType[firstConfigWithRuleStopState.GetState().GetRuleIndex()]) + } + dfa := l.decisionToDFA[l.mode] + + l.atn.stateMu.Lock() + defer l.atn.stateMu.Unlock() + existing, present := dfa.states.Get(proposed) + if present { + + // This state was already present, so just return it. + // + proposed = existing + } else { + + // We need to add the new state + // + proposed.stateNumber = dfa.states.Len() + configs.SetReadOnly(true) + proposed.configs = configs + dfa.states.Put(proposed) + } + if !suppressEdge { + dfa.setS0(proposed) + } + return proposed +} + +func (l *LexerATNSimulator) getDFA(mode int) *DFA { + return l.decisionToDFA[mode] +} + +// Get the text Matched so far for the current token. +func (l *LexerATNSimulator) GetText(input CharStream) string { + // index is first lookahead char, don't include. + return input.GetTextFromInterval(NewInterval(l.startIndex, input.Index()-1)) +} + +func (l *LexerATNSimulator) Consume(input CharStream) { + curChar := input.LA(1) + if curChar == int('\n') { + l.Line++ + l.CharPositionInLine = 0 + } else { + l.CharPositionInLine++ + } + input.Consume() +} + +func (l *LexerATNSimulator) GetCharPositionInLine() int { + return l.CharPositionInLine +} + +func (l *LexerATNSimulator) GetLine() int { + return l.Line +} + +func (l *LexerATNSimulator) GetTokenName(tt int) string { + if tt == -1 { + return "EOF" + } + + var sb strings.Builder + sb.Grow(6) + sb.WriteByte('\'') + sb.WriteRune(rune(tt)) + sb.WriteByte('\'') + + return sb.String() +} + +func resetSimState(sim *SimState) { + sim.index = -1 + sim.line = 0 + sim.column = -1 + sim.dfaState = nil +} + +type SimState struct { + index int + line int + column int + dfaState *DFAState +} + +func NewSimState() *SimState { + s := new(SimState) + resetSimState(s) + return s +} + +func (s *SimState) reset() { + resetSimState(s) +} diff --git a/runtime/Go/antlr/v4/ll1_analyzer.go b/runtime/Go/antlr/v4/ll1_analyzer.go new file mode 100644 index 0000000000..a9e202d041 --- /dev/null +++ b/runtime/Go/antlr/v4/ll1_analyzer.go @@ -0,0 +1,216 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +type LL1Analyzer struct { + atn *ATN +} + +func NewLL1Analyzer(atn *ATN) *LL1Analyzer { + la := new(LL1Analyzer) + la.atn = atn + return la +} + +// - Special value added to the lookahead sets to indicate that we hit +// a predicate during analysis if {@code seeThruPreds==false}. +// +// / +const ( + LL1AnalyzerHitPred = TokenInvalidType +) + +// * +// Calculates the SLL(1) expected lookahead set for each outgoing transition +// of an {@link ATNState}. The returned array has one element for each +// outgoing transition in {@code s}. If the closure from transition +// i leads to a semantic predicate before Matching a symbol, the +// element at index i of the result will be {@code nil}. +// +// @param s the ATN state +// @return the expected symbols for each outgoing transition of {@code s}. +func (la *LL1Analyzer) getDecisionLookahead(s ATNState) []*IntervalSet { + if s == nil { + return nil + } + count := len(s.GetTransitions()) + look := make([]*IntervalSet, count) + for alt := 0; alt < count; alt++ { + look[alt] = NewIntervalSet() + lookBusy := NewJStore[ATNConfig, Comparator[ATNConfig]](&ObjEqComparator[ATNConfig]{}) + seeThruPreds := false // fail to get lookahead upon pred + la.look1(s.GetTransitions()[alt].getTarget(), nil, BasePredictionContextEMPTY, look[alt], lookBusy, NewBitSet(), seeThruPreds, false) + // Wipe out lookahead for la alternative if we found nothing + // or we had a predicate when we !seeThruPreds + if look[alt].length() == 0 || look[alt].contains(LL1AnalyzerHitPred) { + look[alt] = nil + } + } + return look +} + +// * +// Compute set of tokens that can follow {@code s} in the ATN in the +// specified {@code ctx}. +// +//

If {@code ctx} is {@code nil} and the end of the rule containing +// {@code s} is reached, {@link Token//EPSILON} is added to the result set. +// If {@code ctx} is not {@code nil} and the end of the outermost rule is +// reached, {@link Token//EOF} is added to the result set.

+// +// @param s the ATN state +// @param stopState the ATN state to stop at. This can be a +// {@link BlockEndState} to detect epsilon paths through a closure. +// @param ctx the complete parser context, or {@code nil} if the context +// should be ignored +// +// @return The set of tokens that can follow {@code s} in the ATN in the +// specified {@code ctx}. +// / +func (la *LL1Analyzer) Look(s, stopState ATNState, ctx RuleContext) *IntervalSet { + r := NewIntervalSet() + seeThruPreds := true // ignore preds get all lookahead + var lookContext PredictionContext + if ctx != nil { + lookContext = predictionContextFromRuleContext(s.GetATN(), ctx) + } + la.look1(s, stopState, lookContext, r, NewJStore[ATNConfig, Comparator[ATNConfig]](&ObjEqComparator[ATNConfig]{}), NewBitSet(), seeThruPreds, true) + return r +} + +//* +// Compute set of tokens that can follow {@code s} in the ATN in the +// specified {@code ctx}. +// +//

If {@code ctx} is {@code nil} and {@code stopState} or the end of the +// rule containing {@code s} is reached, {@link Token//EPSILON} is added to +// the result set. If {@code ctx} is not {@code nil} and {@code addEOF} is +// {@code true} and {@code stopState} or the end of the outermost rule is +// reached, {@link Token//EOF} is added to the result set.

+// +// @param s the ATN state. +// @param stopState the ATN state to stop at. This can be a +// {@link BlockEndState} to detect epsilon paths through a closure. +// @param ctx The outer context, or {@code nil} if the outer context should +// not be used. +// @param look The result lookahead set. +// @param lookBusy A set used for preventing epsilon closures in the ATN +// from causing a stack overflow. Outside code should pass +// {@code NewSet} for la argument. +// @param calledRuleStack A set used for preventing left recursion in the +// ATN from causing a stack overflow. Outside code should pass +// {@code NewBitSet()} for la argument. +// @param seeThruPreds {@code true} to true semantic predicates as +// implicitly {@code true} and "see through them", otherwise {@code false} +// to treat semantic predicates as opaque and add {@link //HitPred} to the +// result if one is encountered. +// @param addEOF Add {@link Token//EOF} to the result if the end of the +// outermost context is reached. This parameter has no effect if {@code ctx} +// is {@code nil}. + +func (la *LL1Analyzer) look2(s, stopState ATNState, ctx PredictionContext, look *IntervalSet, lookBusy *JStore[ATNConfig, Comparator[ATNConfig]], calledRuleStack *BitSet, seeThruPreds, addEOF bool, i int) { + + returnState := la.atn.states[ctx.getReturnState(i)] + la.look1(returnState, stopState, ctx.GetParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF) + +} + +func (la *LL1Analyzer) look1(s, stopState ATNState, ctx PredictionContext, look *IntervalSet, lookBusy *JStore[ATNConfig, Comparator[ATNConfig]], calledRuleStack *BitSet, seeThruPreds, addEOF bool) { + + c := NewBaseATNConfig6(s, 0, ctx) + + if lookBusy.Contains(c) { + return + } + + _, present := lookBusy.Put(c) + if present { + return + + } + if s == stopState { + if ctx == nil { + look.addOne(TokenEpsilon) + return + } else if ctx.isEmpty() && addEOF { + look.addOne(TokenEOF) + return + } + } + + _, ok := s.(*RuleStopState) + + if ok { + if ctx == nil { + look.addOne(TokenEpsilon) + return + } else if ctx.isEmpty() && addEOF { + look.addOne(TokenEOF) + return + } + + if ctx != BasePredictionContextEMPTY { + removed := calledRuleStack.contains(s.GetRuleIndex()) + defer func() { + if removed { + calledRuleStack.add(s.GetRuleIndex()) + } + }() + calledRuleStack.remove(s.GetRuleIndex()) + // run thru all possible stack tops in ctx + for i := 0; i < ctx.length(); i++ { + returnState := la.atn.states[ctx.getReturnState(i)] + la.look2(returnState, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF, i) + } + return + } + } + + n := len(s.GetTransitions()) + + for i := 0; i < n; i++ { + t := s.GetTransitions()[i] + + if t1, ok := t.(*RuleTransition); ok { + if calledRuleStack.contains(t1.getTarget().GetRuleIndex()) { + continue + } + + newContext := SingletonBasePredictionContextCreate(ctx, t1.followState.GetStateNumber()) + la.look3(stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF, t1) + } else if t2, ok := t.(AbstractPredicateTransition); ok { + if seeThruPreds { + la.look1(t2.getTarget(), stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF) + } else { + look.addOne(LL1AnalyzerHitPred) + } + } else if t.getIsEpsilon() { + la.look1(t.getTarget(), stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF) + } else if _, ok := t.(*WildcardTransition); ok { + look.addRange(TokenMinUserTokenType, la.atn.maxTokenType) + } else { + set := t.getLabel() + if set != nil { + if _, ok := t.(*NotSetTransition); ok { + set = set.complement(TokenMinUserTokenType, la.atn.maxTokenType) + } + look.addSet(set) + } + } + } +} + +func (la *LL1Analyzer) look3(stopState ATNState, ctx PredictionContext, look *IntervalSet, lookBusy *JStore[ATNConfig, Comparator[ATNConfig]], calledRuleStack *BitSet, seeThruPreds, addEOF bool, t1 *RuleTransition) { + + newContext := SingletonBasePredictionContextCreate(ctx, t1.followState.GetStateNumber()) + + defer func() { + calledRuleStack.remove(t1.getTarget().GetRuleIndex()) + }() + + calledRuleStack.add(t1.getTarget().GetRuleIndex()) + la.look1(t1.getTarget(), stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF) + +} diff --git a/runtime/Go/antlr/v4/parser.go b/runtime/Go/antlr/v4/parser.go new file mode 100644 index 0000000000..d26bf06392 --- /dev/null +++ b/runtime/Go/antlr/v4/parser.go @@ -0,0 +1,708 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" +) + +type Parser interface { + Recognizer + + GetInterpreter() *ParserATNSimulator + + GetTokenStream() TokenStream + GetTokenFactory() TokenFactory + GetParserRuleContext() ParserRuleContext + SetParserRuleContext(ParserRuleContext) + Consume() Token + GetParseListeners() []ParseTreeListener + + GetErrorHandler() ErrorStrategy + SetErrorHandler(ErrorStrategy) + GetInputStream() IntStream + GetCurrentToken() Token + GetExpectedTokens() *IntervalSet + NotifyErrorListeners(string, Token, RecognitionException) + IsExpectedToken(int) bool + GetPrecedence() int + GetRuleInvocationStack(ParserRuleContext) []string +} + +type BaseParser struct { + *BaseRecognizer + + Interpreter *ParserATNSimulator + BuildParseTrees bool + + input TokenStream + errHandler ErrorStrategy + precedenceStack IntStack + ctx ParserRuleContext + + tracer *TraceListener + parseListeners []ParseTreeListener + _SyntaxErrors int +} + +// p.is all the parsing support code essentially most of it is error +// recovery stuff.// +func NewBaseParser(input TokenStream) *BaseParser { + + p := new(BaseParser) + + p.BaseRecognizer = NewBaseRecognizer() + + // The input stream. + p.input = nil + // The error handling strategy for the parser. The default value is a new + // instance of {@link DefaultErrorStrategy}. + p.errHandler = NewDefaultErrorStrategy() + p.precedenceStack = make([]int, 0) + p.precedenceStack.Push(0) + // The {@link ParserRuleContext} object for the currently executing rule. + // p.is always non-nil during the parsing process. + p.ctx = nil + // Specifies whether or not the parser should construct a parse tree during + // the parsing process. The default value is {@code true}. + p.BuildParseTrees = true + // When {@link //setTrace}{@code (true)} is called, a reference to the + // {@link TraceListener} is stored here so it can be easily removed in a + // later call to {@link //setTrace}{@code (false)}. The listener itself is + // implemented as a parser listener so p.field is not directly used by + // other parser methods. + p.tracer = nil + // The list of {@link ParseTreeListener} listeners registered to receive + // events during the parse. + p.parseListeners = nil + // The number of syntax errors Reported during parsing. p.value is + // incremented each time {@link //NotifyErrorListeners} is called. + p._SyntaxErrors = 0 + p.SetInputStream(input) + + return p +} + +// p.field maps from the serialized ATN string to the deserialized {@link +// ATN} with +// bypass alternatives. +// +// @see ATNDeserializationOptions//isGenerateRuleBypassTransitions() +var bypassAltsAtnCache = make(map[string]int) + +// reset the parser's state// +func (p *BaseParser) reset() { + if p.input != nil { + p.input.Seek(0) + } + p.errHandler.reset(p) + p.ctx = nil + p._SyntaxErrors = 0 + p.SetTrace(nil) + p.precedenceStack = make([]int, 0) + p.precedenceStack.Push(0) + if p.Interpreter != nil { + p.Interpreter.reset() + } +} + +func (p *BaseParser) GetErrorHandler() ErrorStrategy { + return p.errHandler +} + +func (p *BaseParser) SetErrorHandler(e ErrorStrategy) { + p.errHandler = e +} + +// Match current input symbol against {@code ttype}. If the symbol type +// Matches, {@link ANTLRErrorStrategy//ReportMatch} and {@link //consume} are +// called to complete the Match process. +// +//

If the symbol type does not Match, +// {@link ANTLRErrorStrategy//recoverInline} is called on the current error +// strategy to attempt recovery. If {@link //getBuildParseTree} is +// {@code true} and the token index of the symbol returned by +// {@link ANTLRErrorStrategy//recoverInline} is -1, the symbol is added to +// the parse tree by calling {@link ParserRuleContext//addErrorNode}.

+// +// @param ttype the token type to Match +// @return the Matched symbol +// @panics RecognitionException if the current input symbol did not Match +// {@code ttype} and the error strategy could not recover from the +// mismatched symbol + +func (p *BaseParser) Match(ttype int) Token { + + t := p.GetCurrentToken() + + if t.GetTokenType() == ttype { + p.errHandler.ReportMatch(p) + p.Consume() + } else { + t = p.errHandler.RecoverInline(p) + if p.BuildParseTrees && t.GetTokenIndex() == -1 { + // we must have conjured up a Newtoken during single token + // insertion + // if it's not the current symbol + p.ctx.AddErrorNode(t) + } + } + + return t +} + +// Match current input symbol as a wildcard. If the symbol type Matches +// (i.e. has a value greater than 0), {@link ANTLRErrorStrategy//ReportMatch} +// and {@link //consume} are called to complete the Match process. +// +//

If the symbol type does not Match, +// {@link ANTLRErrorStrategy//recoverInline} is called on the current error +// strategy to attempt recovery. If {@link //getBuildParseTree} is +// {@code true} and the token index of the symbol returned by +// {@link ANTLRErrorStrategy//recoverInline} is -1, the symbol is added to +// the parse tree by calling {@link ParserRuleContext//addErrorNode}.

+// +// @return the Matched symbol +// @panics RecognitionException if the current input symbol did not Match +// a wildcard and the error strategy could not recover from the mismatched +// symbol + +func (p *BaseParser) MatchWildcard() Token { + t := p.GetCurrentToken() + if t.GetTokenType() > 0 { + p.errHandler.ReportMatch(p) + p.Consume() + } else { + t = p.errHandler.RecoverInline(p) + if p.BuildParseTrees && t.GetTokenIndex() == -1 { + // we must have conjured up a Newtoken during single token + // insertion + // if it's not the current symbol + p.ctx.AddErrorNode(t) + } + } + return t +} + +func (p *BaseParser) GetParserRuleContext() ParserRuleContext { + return p.ctx +} + +func (p *BaseParser) SetParserRuleContext(v ParserRuleContext) { + p.ctx = v +} + +func (p *BaseParser) GetParseListeners() []ParseTreeListener { + if p.parseListeners == nil { + return make([]ParseTreeListener, 0) + } + return p.parseListeners +} + +// Registers {@code listener} to receive events during the parsing process. +// +//

To support output-preserving grammar transformations (including but not +// limited to left-recursion removal, automated left-factoring, and +// optimized code generation), calls to listener methods during the parse +// may differ substantially from calls made by +// {@link ParseTreeWalker//DEFAULT} used after the parse is complete. In +// particular, rule entry and exit events may occur in a different order +// during the parse than after the parser. In addition, calls to certain +// rule entry methods may be omitted.

+// +//

With the following specific exceptions, calls to listener events are +// deterministic, i.e. for identical input the calls to listener +// methods will be the same.

+// +//
    +//
  • Alterations to the grammar used to generate code may change the +// behavior of the listener calls.
  • +//
  • Alterations to the command line options passed to ANTLR 4 when +// generating the parser may change the behavior of the listener calls.
  • +//
  • Changing the version of the ANTLR Tool used to generate the parser +// may change the behavior of the listener calls.
  • +//
+// +// @param listener the listener to add +// +// @panics nilPointerException if {@code} listener is {@code nil} +func (p *BaseParser) AddParseListener(listener ParseTreeListener) { + if listener == nil { + panic("listener") + } + if p.parseListeners == nil { + p.parseListeners = make([]ParseTreeListener, 0) + } + p.parseListeners = append(p.parseListeners, listener) +} + +// Remove {@code listener} from the list of parse listeners. +// +//

If {@code listener} is {@code nil} or has not been added as a parse +// listener, p.method does nothing.

+// @param listener the listener to remove +func (p *BaseParser) RemoveParseListener(listener ParseTreeListener) { + + if p.parseListeners != nil { + + idx := -1 + for i, v := range p.parseListeners { + if v == listener { + idx = i + break + } + } + + if idx == -1 { + return + } + + // remove the listener from the slice + p.parseListeners = append(p.parseListeners[0:idx], p.parseListeners[idx+1:]...) + + if len(p.parseListeners) == 0 { + p.parseListeners = nil + } + } +} + +// Remove all parse listeners. +func (p *BaseParser) removeParseListeners() { + p.parseListeners = nil +} + +// Notify any parse listeners of an enter rule event. +func (p *BaseParser) TriggerEnterRuleEvent() { + if p.parseListeners != nil { + ctx := p.ctx + for _, listener := range p.parseListeners { + listener.EnterEveryRule(ctx) + ctx.EnterRule(listener) + } + } +} + +// Notify any parse listeners of an exit rule event. +// +// @see //addParseListener +func (p *BaseParser) TriggerExitRuleEvent() { + if p.parseListeners != nil { + // reverse order walk of listeners + ctx := p.ctx + l := len(p.parseListeners) - 1 + + for i := range p.parseListeners { + listener := p.parseListeners[l-i] + ctx.ExitRule(listener) + listener.ExitEveryRule(ctx) + } + } +} + +func (p *BaseParser) GetInterpreter() *ParserATNSimulator { + return p.Interpreter +} + +func (p *BaseParser) GetATN() *ATN { + return p.Interpreter.atn +} + +func (p *BaseParser) GetTokenFactory() TokenFactory { + return p.input.GetTokenSource().GetTokenFactory() +} + +// Tell our token source and error strategy about a Newway to create tokens.// +func (p *BaseParser) setTokenFactory(factory TokenFactory) { + p.input.GetTokenSource().setTokenFactory(factory) +} + +// The ATN with bypass alternatives is expensive to create so we create it +// lazily. +// +// @panics UnsupportedOperationException if the current parser does not +// implement the {@link //getSerializedATN()} method. +func (p *BaseParser) GetATNWithBypassAlts() { + + // TODO + panic("Not implemented!") + + // serializedAtn := p.getSerializedATN() + // if (serializedAtn == nil) { + // panic("The current parser does not support an ATN with bypass alternatives.") + // } + // result := p.bypassAltsAtnCache[serializedAtn] + // if (result == nil) { + // deserializationOptions := NewATNDeserializationOptions(nil) + // deserializationOptions.generateRuleBypassTransitions = true + // result = NewATNDeserializer(deserializationOptions).deserialize(serializedAtn) + // p.bypassAltsAtnCache[serializedAtn] = result + // } + // return result +} + +// The preferred method of getting a tree pattern. For example, here's a +// sample use: +// +//
+// ParseTree t = parser.expr()
+// ParseTreePattern p = parser.compileParseTreePattern("<ID>+0",
+// MyParser.RULE_expr)
+// ParseTreeMatch m = p.Match(t)
+// String id = m.Get("ID")
+// 
+ +func (p *BaseParser) compileParseTreePattern(pattern, patternRuleIndex, lexer Lexer) { + + panic("NewParseTreePatternMatcher not implemented!") + // + // if (lexer == nil) { + // if (p.GetTokenStream() != nil) { + // tokenSource := p.GetTokenStream().GetTokenSource() + // if _, ok := tokenSource.(ILexer); ok { + // lexer = tokenSource + // } + // } + // } + // if (lexer == nil) { + // panic("Parser can't discover a lexer to use") + // } + + // m := NewParseTreePatternMatcher(lexer, p) + // return m.compile(pattern, patternRuleIndex) +} + +func (p *BaseParser) GetInputStream() IntStream { + return p.GetTokenStream() +} + +func (p *BaseParser) SetInputStream(input TokenStream) { + p.SetTokenStream(input) +} + +func (p *BaseParser) GetTokenStream() TokenStream { + return p.input +} + +// Set the token stream and reset the parser.// +func (p *BaseParser) SetTokenStream(input TokenStream) { + p.input = nil + p.reset() + p.input = input +} + +// Match needs to return the current input symbol, which gets put +// into the label for the associated token ref e.g., x=ID. +func (p *BaseParser) GetCurrentToken() Token { + return p.input.LT(1) +} + +func (p *BaseParser) NotifyErrorListeners(msg string, offendingToken Token, err RecognitionException) { + if offendingToken == nil { + offendingToken = p.GetCurrentToken() + } + p._SyntaxErrors++ + line := offendingToken.GetLine() + column := offendingToken.GetColumn() + listener := p.GetErrorListenerDispatch() + listener.SyntaxError(p, offendingToken, line, column, msg, err) +} + +func (p *BaseParser) Consume() Token { + o := p.GetCurrentToken() + if o.GetTokenType() != TokenEOF { + p.GetInputStream().Consume() + } + hasListener := p.parseListeners != nil && len(p.parseListeners) > 0 + if p.BuildParseTrees || hasListener { + if p.errHandler.InErrorRecoveryMode(p) { + node := p.ctx.AddErrorNode(o) + if p.parseListeners != nil { + for _, l := range p.parseListeners { + l.VisitErrorNode(node) + } + } + + } else { + node := p.ctx.AddTokenNode(o) + if p.parseListeners != nil { + for _, l := range p.parseListeners { + l.VisitTerminal(node) + } + } + } + // node.invokingState = p.state + } + + return o +} + +func (p *BaseParser) addContextToParseTree() { + // add current context to parent if we have a parent + if p.ctx.GetParent() != nil { + p.ctx.GetParent().(ParserRuleContext).AddChild(p.ctx) + } +} + +func (p *BaseParser) EnterRule(localctx ParserRuleContext, state, ruleIndex int) { + p.SetState(state) + p.ctx = localctx + p.ctx.SetStart(p.input.LT(1)) + if p.BuildParseTrees { + p.addContextToParseTree() + } + if p.parseListeners != nil { + p.TriggerEnterRuleEvent() + } +} + +func (p *BaseParser) ExitRule() { + p.ctx.SetStop(p.input.LT(-1)) + // trigger event on ctx, before it reverts to parent + if p.parseListeners != nil { + p.TriggerExitRuleEvent() + } + p.SetState(p.ctx.GetInvokingState()) + if p.ctx.GetParent() != nil { + p.ctx = p.ctx.GetParent().(ParserRuleContext) + } else { + p.ctx = nil + } +} + +func (p *BaseParser) EnterOuterAlt(localctx ParserRuleContext, altNum int) { + localctx.SetAltNumber(altNum) + // if we have Newlocalctx, make sure we replace existing ctx + // that is previous child of parse tree + if p.BuildParseTrees && p.ctx != localctx { + if p.ctx.GetParent() != nil { + p.ctx.GetParent().(ParserRuleContext).RemoveLastChild() + p.ctx.GetParent().(ParserRuleContext).AddChild(localctx) + } + } + p.ctx = localctx +} + +// Get the precedence level for the top-most precedence rule. +// +// @return The precedence level for the top-most precedence rule, or -1 if +// the parser context is not nested within a precedence rule. + +func (p *BaseParser) GetPrecedence() int { + if len(p.precedenceStack) == 0 { + return -1 + } + + return p.precedenceStack[len(p.precedenceStack)-1] +} + +func (p *BaseParser) EnterRecursionRule(localctx ParserRuleContext, state, ruleIndex, precedence int) { + p.SetState(state) + p.precedenceStack.Push(precedence) + p.ctx = localctx + p.ctx.SetStart(p.input.LT(1)) + if p.parseListeners != nil { + p.TriggerEnterRuleEvent() // simulates rule entry for + // left-recursive rules + } +} + +// +// Like {@link //EnterRule} but for recursive rules. + +func (p *BaseParser) PushNewRecursionContext(localctx ParserRuleContext, state, ruleIndex int) { + previous := p.ctx + previous.SetParent(localctx) + previous.SetInvokingState(state) + previous.SetStop(p.input.LT(-1)) + + p.ctx = localctx + p.ctx.SetStart(previous.GetStart()) + if p.BuildParseTrees { + p.ctx.AddChild(previous) + } + if p.parseListeners != nil { + p.TriggerEnterRuleEvent() // simulates rule entry for + // left-recursive rules + } +} + +func (p *BaseParser) UnrollRecursionContexts(parentCtx ParserRuleContext) { + p.precedenceStack.Pop() + p.ctx.SetStop(p.input.LT(-1)) + retCtx := p.ctx // save current ctx (return value) + // unroll so ctx is as it was before call to recursive method + if p.parseListeners != nil { + for p.ctx != parentCtx { + p.TriggerExitRuleEvent() + p.ctx = p.ctx.GetParent().(ParserRuleContext) + } + } else { + p.ctx = parentCtx + } + // hook into tree + retCtx.SetParent(parentCtx) + if p.BuildParseTrees && parentCtx != nil { + // add return ctx into invoking rule's tree + parentCtx.AddChild(retCtx) + } +} + +func (p *BaseParser) GetInvokingContext(ruleIndex int) ParserRuleContext { + ctx := p.ctx + for ctx != nil { + if ctx.GetRuleIndex() == ruleIndex { + return ctx + } + ctx = ctx.GetParent().(ParserRuleContext) + } + return nil +} + +func (p *BaseParser) Precpred(localctx RuleContext, precedence int) bool { + return precedence >= p.precedenceStack[len(p.precedenceStack)-1] +} + +func (p *BaseParser) inContext(context ParserRuleContext) bool { + // TODO: useful in parser? + return false +} + +// +// Checks whether or not {@code symbol} can follow the current state in the +// ATN. The behavior of p.method is equivalent to the following, but is +// implemented such that the complete context-sensitive follow set does not +// need to be explicitly constructed. +// +//
+// return getExpectedTokens().contains(symbol)
+// 
+// +// @param symbol the symbol type to check +// @return {@code true} if {@code symbol} can follow the current state in +// the ATN, otherwise {@code false}. + +func (p *BaseParser) IsExpectedToken(symbol int) bool { + atn := p.Interpreter.atn + ctx := p.ctx + s := atn.states[p.state] + following := atn.NextTokens(s, nil) + if following.contains(symbol) { + return true + } + if !following.contains(TokenEpsilon) { + return false + } + for ctx != nil && ctx.GetInvokingState() >= 0 && following.contains(TokenEpsilon) { + invokingState := atn.states[ctx.GetInvokingState()] + rt := invokingState.GetTransitions()[0] + following = atn.NextTokens(rt.(*RuleTransition).followState, nil) + if following.contains(symbol) { + return true + } + ctx = ctx.GetParent().(ParserRuleContext) + } + if following.contains(TokenEpsilon) && symbol == TokenEOF { + return true + } + + return false +} + +// Computes the set of input symbols which could follow the current parser +// state and context, as given by {@link //GetState} and {@link //GetContext}, +// respectively. +// +// @see ATN//getExpectedTokens(int, RuleContext) +func (p *BaseParser) GetExpectedTokens() *IntervalSet { + return p.Interpreter.atn.getExpectedTokens(p.state, p.ctx) +} + +func (p *BaseParser) GetExpectedTokensWithinCurrentRule() *IntervalSet { + atn := p.Interpreter.atn + s := atn.states[p.state] + return atn.NextTokens(s, nil) +} + +// Get a rule's index (i.e., {@code RULE_ruleName} field) or -1 if not found.// +func (p *BaseParser) GetRuleIndex(ruleName string) int { + var ruleIndex, ok = p.GetRuleIndexMap()[ruleName] + if ok { + return ruleIndex + } + + return -1 +} + +// Return List<String> of the rule names in your parser instance +// leading up to a call to the current rule. You could override if +// you want more details such as the file/line info of where +// in the ATN a rule is invoked. +// +// this very useful for error messages. + +func (p *BaseParser) GetRuleInvocationStack(c ParserRuleContext) []string { + if c == nil { + c = p.ctx + } + stack := make([]string, 0) + for c != nil { + // compute what follows who invoked us + ruleIndex := c.GetRuleIndex() + if ruleIndex < 0 { + stack = append(stack, "n/a") + } else { + stack = append(stack, p.GetRuleNames()[ruleIndex]) + } + + vp := c.GetParent() + + if vp == nil { + break + } + + c = vp.(ParserRuleContext) + } + return stack +} + +// For debugging and other purposes.// +func (p *BaseParser) GetDFAStrings() string { + return fmt.Sprint(p.Interpreter.decisionToDFA) +} + +// For debugging and other purposes.// +func (p *BaseParser) DumpDFA() { + seenOne := false + for _, dfa := range p.Interpreter.decisionToDFA { + if dfa.states.Len() > 0 { + if seenOne { + fmt.Println() + } + fmt.Println("Decision " + strconv.Itoa(dfa.decision) + ":") + fmt.Print(dfa.String(p.LiteralNames, p.SymbolicNames)) + seenOne = true + } + } +} + +func (p *BaseParser) GetSourceName() string { + return p.GrammarFileName +} + +// During a parse is sometimes useful to listen in on the rule entry and exit +// events as well as token Matches. p.is for quick and dirty debugging. +func (p *BaseParser) SetTrace(trace *TraceListener) { + if trace == nil { + p.RemoveParseListener(p.tracer) + p.tracer = nil + } else { + if p.tracer != nil { + p.RemoveParseListener(p.tracer) + } + p.tracer = NewTraceListener(p) + p.AddParseListener(p.tracer) + } +} diff --git a/runtime/Go/antlr/v4/parser_atn_simulator.go b/runtime/Go/antlr/v4/parser_atn_simulator.go new file mode 100644 index 0000000000..c780e3c5f2 --- /dev/null +++ b/runtime/Go/antlr/v4/parser_atn_simulator.go @@ -0,0 +1,1549 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" + "strings" +) + +var ( + ParserATNSimulatorDebug = false + ParserATNSimulatorListATNDecisions = false + ParserATNSimulatorDFADebug = false + ParserATNSimulatorRetryDebug = false + TurnOffLRLoopEntryBranchOpt = false +) + +type ParserATNSimulator struct { + *BaseATNSimulator + + parser Parser + predictionMode int + input TokenStream + startIndex int + dfa *DFA + mergeCache *DoubleDict + outerContext ParserRuleContext +} + +func NewParserATNSimulator(parser Parser, atn *ATN, decisionToDFA []*DFA, sharedContextCache *PredictionContextCache) *ParserATNSimulator { + + p := new(ParserATNSimulator) + + p.BaseATNSimulator = NewBaseATNSimulator(atn, sharedContextCache) + + p.parser = parser + p.decisionToDFA = decisionToDFA + // SLL, LL, or LL + exact ambig detection?// + p.predictionMode = PredictionModeLL + // LAME globals to avoid parameters!!!!! I need these down deep in predTransition + p.input = nil + p.startIndex = 0 + p.outerContext = nil + p.dfa = nil + // Each prediction operation uses a cache for merge of prediction contexts. + // Don't keep around as it wastes huge amounts of memory. DoubleKeyMap + // isn't Synchronized but we're ok since two threads shouldn't reuse same + // parser/atnsim object because it can only handle one input at a time. + // This maps graphs a and b to merged result c. (a,b)&rarrc. We can avoid + // the merge if we ever see a and b again. Note that (b,a)&rarrc should + // also be examined during cache lookup. + // + p.mergeCache = nil + + return p +} + +func (p *ParserATNSimulator) GetPredictionMode() int { + return p.predictionMode +} + +func (p *ParserATNSimulator) SetPredictionMode(v int) { + p.predictionMode = v +} + +func (p *ParserATNSimulator) reset() { +} + +func (p *ParserATNSimulator) AdaptivePredict(input TokenStream, decision int, outerContext ParserRuleContext) int { + if ParserATNSimulatorDebug || ParserATNSimulatorListATNDecisions { + fmt.Println("AdaptivePredict decision " + strconv.Itoa(decision) + + " exec LA(1)==" + p.getLookaheadName(input) + + " line " + strconv.Itoa(input.LT(1).GetLine()) + ":" + + strconv.Itoa(input.LT(1).GetColumn())) + } + + p.input = input + p.startIndex = input.Index() + p.outerContext = outerContext + + dfa := p.decisionToDFA[decision] + p.dfa = dfa + m := input.Mark() + index := input.Index() + + defer func() { + p.dfa = nil + p.mergeCache = nil // wack cache after each prediction + input.Seek(index) + input.Release(m) + }() + + // Now we are certain to have a specific decision's DFA + // But, do we still need an initial state? + var s0 *DFAState + p.atn.stateMu.RLock() + if dfa.getPrecedenceDfa() { + p.atn.edgeMu.RLock() + // the start state for a precedence DFA depends on the current + // parser precedence, and is provided by a DFA method. + s0 = dfa.getPrecedenceStartState(p.parser.GetPrecedence()) + p.atn.edgeMu.RUnlock() + } else { + // the start state for a "regular" DFA is just s0 + s0 = dfa.getS0() + } + p.atn.stateMu.RUnlock() + + if s0 == nil { + if outerContext == nil { + outerContext = ParserRuleContextEmpty + } + if ParserATNSimulatorDebug || ParserATNSimulatorListATNDecisions { + fmt.Println("predictATN decision " + strconv.Itoa(dfa.decision) + + " exec LA(1)==" + p.getLookaheadName(input) + + ", outerContext=" + outerContext.String(p.parser.GetRuleNames(), nil)) + } + fullCtx := false + s0Closure := p.computeStartState(dfa.atnStartState, ParserRuleContextEmpty, fullCtx) + + p.atn.stateMu.Lock() + if dfa.getPrecedenceDfa() { + // If p is a precedence DFA, we use applyPrecedenceFilter + // to convert the computed start state to a precedence start + // state. We then use DFA.setPrecedenceStartState to set the + // appropriate start state for the precedence level rather + // than simply setting DFA.s0. + // + dfa.s0.configs = s0Closure + s0Closure = p.applyPrecedenceFilter(s0Closure) + s0 = p.addDFAState(dfa, NewDFAState(-1, s0Closure)) + p.atn.edgeMu.Lock() + dfa.setPrecedenceStartState(p.parser.GetPrecedence(), s0) + p.atn.edgeMu.Unlock() + } else { + s0 = p.addDFAState(dfa, NewDFAState(-1, s0Closure)) + dfa.setS0(s0) + } + p.atn.stateMu.Unlock() + } + + alt := p.execATN(dfa, s0, input, index, outerContext) + if ParserATNSimulatorDebug { + fmt.Println("DFA after predictATN: " + dfa.String(p.parser.GetLiteralNames(), nil)) + } + return alt + +} + +// Performs ATN simulation to compute a predicted alternative based +// upon the remaining input, but also updates the DFA cache to avoid +// having to traverse the ATN again for the same input sequence. + +// There are some key conditions we're looking for after computing a new +// set of ATN configs (proposed DFA state): +// if the set is empty, there is no viable alternative for current symbol +// does the state uniquely predict an alternative? +// does the state have a conflict that would prevent us from +// putting it on the work list? + +// We also have some key operations to do: +// add an edge from previous DFA state to potentially NewDFA state, D, +// upon current symbol but only if adding to work list, which means in all +// cases except no viable alternative (and possibly non-greedy decisions?) +// collecting predicates and adding semantic context to DFA accept states +// adding rule context to context-sensitive DFA accept states +// consuming an input symbol +// Reporting a conflict +// Reporting an ambiguity +// Reporting a context sensitivity +// Reporting insufficient predicates + +// cover these cases: +// +// dead end +// single alt +// single alt + preds +// conflict +// conflict + preds +func (p *ParserATNSimulator) execATN(dfa *DFA, s0 *DFAState, input TokenStream, startIndex int, outerContext ParserRuleContext) int { + + if ParserATNSimulatorDebug || ParserATNSimulatorListATNDecisions { + fmt.Println("execATN decision " + strconv.Itoa(dfa.decision) + + " exec LA(1)==" + p.getLookaheadName(input) + + " line " + strconv.Itoa(input.LT(1).GetLine()) + ":" + strconv.Itoa(input.LT(1).GetColumn())) + } + + previousD := s0 + + if ParserATNSimulatorDebug { + fmt.Println("s0 = " + s0.String()) + } + t := input.LA(1) + for { // for more work + D := p.getExistingTargetState(previousD, t) + if D == nil { + D = p.computeTargetState(dfa, previousD, t) + } + if D == ATNSimulatorError { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for SLL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + e := p.noViableAlt(input, outerContext, previousD.configs, startIndex) + input.Seek(startIndex) + alt := p.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD.configs, outerContext) + if alt != ATNInvalidAltNumber { + return alt + } + + panic(e) + } + if D.requiresFullContext && p.predictionMode != PredictionModeSLL { + // IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error) + conflictingAlts := D.configs.GetConflictingAlts() + if D.predicates != nil { + if ParserATNSimulatorDebug { + fmt.Println("DFA state has preds in DFA sim LL failover") + } + conflictIndex := input.Index() + if conflictIndex != startIndex { + input.Seek(startIndex) + } + conflictingAlts = p.evalSemanticContext(D.predicates, outerContext, true) + if conflictingAlts.length() == 1 { + if ParserATNSimulatorDebug { + fmt.Println("Full LL avoided") + } + return conflictingAlts.minValue() + } + if conflictIndex != startIndex { + // restore the index so Reporting the fallback to full + // context occurs with the index at the correct spot + input.Seek(conflictIndex) + } + } + if ParserATNSimulatorDFADebug { + fmt.Println("ctx sensitive state " + outerContext.String(nil, nil) + " in " + D.String()) + } + fullCtx := true + s0Closure := p.computeStartState(dfa.atnStartState, outerContext, fullCtx) + p.ReportAttemptingFullContext(dfa, conflictingAlts, D.configs, startIndex, input.Index()) + alt := p.execATNWithFullContext(dfa, D, s0Closure, input, startIndex, outerContext) + return alt + } + if D.isAcceptState { + if D.predicates == nil { + return D.prediction + } + stopIndex := input.Index() + input.Seek(startIndex) + alts := p.evalSemanticContext(D.predicates, outerContext, true) + + switch alts.length() { + case 0: + panic(p.noViableAlt(input, outerContext, D.configs, startIndex)) + case 1: + return alts.minValue() + default: + // Report ambiguity after predicate evaluation to make sure the correct set of ambig alts is Reported. + p.ReportAmbiguity(dfa, D, startIndex, stopIndex, false, alts, D.configs) + return alts.minValue() + } + } + previousD = D + + if t != TokenEOF { + input.Consume() + t = input.LA(1) + } + } +} + +// Get an existing target state for an edge in the DFA. If the target state +// for the edge has not yet been computed or is otherwise not available, +// p method returns {@code nil}. +// +// @param previousD The current DFA state +// @param t The next input symbol +// @return The existing target DFA state for the given input symbol +// {@code t}, or {@code nil} if the target state for p edge is not +// already cached + +func (p *ParserATNSimulator) getExistingTargetState(previousD *DFAState, t int) *DFAState { + if t+1 < 0 { + return nil + } + + p.atn.edgeMu.RLock() + defer p.atn.edgeMu.RUnlock() + edges := previousD.getEdges() + if edges == nil || t+1 >= len(edges) { + return nil + } + return previousD.getIthEdge(t + 1) +} + +// Compute a target state for an edge in the DFA, and attempt to add the +// computed state and corresponding edge to the DFA. +// +// @param dfa The DFA +// @param previousD The current DFA state +// @param t The next input symbol +// +// @return The computed target DFA state for the given input symbol +// {@code t}. If {@code t} does not lead to a valid DFA state, p method +// returns {@link //ERROR}. + +func (p *ParserATNSimulator) computeTargetState(dfa *DFA, previousD *DFAState, t int) *DFAState { + reach := p.computeReachSet(previousD.configs, t, false) + + if reach == nil { + p.addDFAEdge(dfa, previousD, t, ATNSimulatorError) + return ATNSimulatorError + } + // create Newtarget state we'll add to DFA after it's complete + D := NewDFAState(-1, reach) + + predictedAlt := p.getUniqueAlt(reach) + + if ParserATNSimulatorDebug { + altSubSets := PredictionModegetConflictingAltSubsets(reach) + fmt.Println("SLL altSubSets=" + fmt.Sprint(altSubSets) + + ", previous=" + previousD.configs.String() + + ", configs=" + reach.String() + + ", predict=" + strconv.Itoa(predictedAlt) + + ", allSubsetsConflict=" + + fmt.Sprint(PredictionModeallSubsetsConflict(altSubSets)) + + ", conflictingAlts=" + p.getConflictingAlts(reach).String()) + } + if predictedAlt != ATNInvalidAltNumber { + // NO CONFLICT, UNIQUELY PREDICTED ALT + D.isAcceptState = true + D.configs.SetUniqueAlt(predictedAlt) + D.setPrediction(predictedAlt) + } else if PredictionModehasSLLConflictTerminatingPrediction(p.predictionMode, reach) { + // MORE THAN ONE VIABLE ALTERNATIVE + D.configs.SetConflictingAlts(p.getConflictingAlts(reach)) + D.requiresFullContext = true + // in SLL-only mode, we will stop at p state and return the minimum alt + D.isAcceptState = true + D.setPrediction(D.configs.GetConflictingAlts().minValue()) + } + if D.isAcceptState && D.configs.HasSemanticContext() { + p.predicateDFAState(D, p.atn.getDecisionState(dfa.decision)) + if D.predicates != nil { + D.setPrediction(ATNInvalidAltNumber) + } + } + // all adds to dfa are done after we've created full D state + D = p.addDFAEdge(dfa, previousD, t, D) + return D +} + +func (p *ParserATNSimulator) predicateDFAState(dfaState *DFAState, decisionState DecisionState) { + // We need to test all predicates, even in DFA states that + // uniquely predict alternative. + nalts := len(decisionState.GetTransitions()) + // Update DFA so reach becomes accept state with (predicate,alt) + // pairs if preds found for conflicting alts + altsToCollectPredsFrom := p.getConflictingAltsOrUniqueAlt(dfaState.configs) + altToPred := p.getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configs, nalts) + if altToPred != nil { + dfaState.predicates = p.getPredicatePredictions(altsToCollectPredsFrom, altToPred) + dfaState.setPrediction(ATNInvalidAltNumber) // make sure we use preds + } else { + // There are preds in configs but they might go away + // when OR'd together like {p}? || NONE == NONE. If neither + // alt has preds, resolve to min alt + dfaState.setPrediction(altsToCollectPredsFrom.minValue()) + } +} + +// comes back with reach.uniqueAlt set to a valid alt +func (p *ParserATNSimulator) execATNWithFullContext(dfa *DFA, D *DFAState, s0 ATNConfigSet, input TokenStream, startIndex int, outerContext ParserRuleContext) int { + + if ParserATNSimulatorDebug || ParserATNSimulatorListATNDecisions { + fmt.Println("execATNWithFullContext " + s0.String()) + } + + fullCtx := true + foundExactAmbig := false + var reach ATNConfigSet + previous := s0 + input.Seek(startIndex) + t := input.LA(1) + predictedAlt := -1 + + for { // for more work + reach = p.computeReachSet(previous, t, fullCtx) + if reach == nil { + // if any configs in previous dipped into outer context, that + // means that input up to t actually finished entry rule + // at least for LL decision. Full LL doesn't dip into outer + // so don't need special case. + // We will get an error no matter what so delay until after + // decision better error message. Also, no reachable target + // ATN states in SLL implies LL will also get nowhere. + // If conflict in states that dip out, choose min since we + // will get error no matter what. + e := p.noViableAlt(input, outerContext, previous, startIndex) + input.Seek(startIndex) + alt := p.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext) + if alt != ATNInvalidAltNumber { + return alt + } + + panic(e) + } + altSubSets := PredictionModegetConflictingAltSubsets(reach) + if ParserATNSimulatorDebug { + fmt.Println("LL altSubSets=" + fmt.Sprint(altSubSets) + ", predict=" + + strconv.Itoa(PredictionModegetUniqueAlt(altSubSets)) + ", resolvesToJustOneViableAlt=" + + fmt.Sprint(PredictionModeresolvesToJustOneViableAlt(altSubSets))) + } + reach.SetUniqueAlt(p.getUniqueAlt(reach)) + // unique prediction? + if reach.GetUniqueAlt() != ATNInvalidAltNumber { + predictedAlt = reach.GetUniqueAlt() + break + } + if p.predictionMode != PredictionModeLLExactAmbigDetection { + predictedAlt = PredictionModeresolvesToJustOneViableAlt(altSubSets) + if predictedAlt != ATNInvalidAltNumber { + break + } + } else { + // In exact ambiguity mode, we never try to terminate early. + // Just keeps scarfing until we know what the conflict is + if PredictionModeallSubsetsConflict(altSubSets) && PredictionModeallSubsetsEqual(altSubSets) { + foundExactAmbig = true + predictedAlt = PredictionModegetSingleViableAlt(altSubSets) + break + } + // else there are multiple non-conflicting subsets or + // we're not sure what the ambiguity is yet. + // So, keep going. + } + previous = reach + if t != TokenEOF { + input.Consume() + t = input.LA(1) + } + } + // If the configuration set uniquely predicts an alternative, + // without conflict, then we know that it's a full LL decision + // not SLL. + if reach.GetUniqueAlt() != ATNInvalidAltNumber { + p.ReportContextSensitivity(dfa, predictedAlt, reach, startIndex, input.Index()) + return predictedAlt + } + // We do not check predicates here because we have checked them + // on-the-fly when doing full context prediction. + + // + // In non-exact ambiguity detection mode, we might actually be able to + // detect an exact ambiguity, but I'm not going to spend the cycles + // needed to check. We only emit ambiguity warnings in exact ambiguity + // mode. + // + // For example, we might know that we have conflicting configurations. + // But, that does not mean that there is no way forward without a + // conflict. It's possible to have nonconflicting alt subsets as in: + + // altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}] + + // from + // + // [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]), + // (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])] + // + // In p case, (17,1,[5 $]) indicates there is some next sequence that + // would resolve p without conflict to alternative 1. Any other viable + // next sequence, however, is associated with a conflict. We stop + // looking for input because no amount of further lookahead will alter + // the fact that we should predict alternative 1. We just can't say for + // sure that there is an ambiguity without looking further. + + p.ReportAmbiguity(dfa, D, startIndex, input.Index(), foundExactAmbig, reach.Alts(), reach) + + return predictedAlt +} + +func (p *ParserATNSimulator) computeReachSet(closure ATNConfigSet, t int, fullCtx bool) ATNConfigSet { + if ParserATNSimulatorDebug { + fmt.Println("in computeReachSet, starting closure: " + closure.String()) + } + if p.mergeCache == nil { + p.mergeCache = NewDoubleDict() + } + intermediate := NewBaseATNConfigSet(fullCtx) + + // Configurations already in a rule stop state indicate reaching the end + // of the decision rule (local context) or end of the start rule (full + // context). Once reached, these configurations are never updated by a + // closure operation, so they are handled separately for the performance + // advantage of having a smaller intermediate set when calling closure. + // + // For full-context reach operations, separate handling is required to + // ensure that the alternative Matching the longest overall sequence is + // chosen when multiple such configurations can Match the input. + + var skippedStopStates []*BaseATNConfig + + // First figure out where we can reach on input t + for _, c := range closure.GetItems() { + if ParserATNSimulatorDebug { + fmt.Println("testing " + p.GetTokenName(t) + " at " + c.String()) + } + + if _, ok := c.GetState().(*RuleStopState); ok { + if fullCtx || t == TokenEOF { + skippedStopStates = append(skippedStopStates, c.(*BaseATNConfig)) + if ParserATNSimulatorDebug { + fmt.Println("added " + c.String() + " to SkippedStopStates") + } + } + continue + } + + for _, trans := range c.GetState().GetTransitions() { + target := p.getReachableTarget(trans, t) + if target != nil { + cfg := NewBaseATNConfig4(c, target) + intermediate.Add(cfg, p.mergeCache) + if ParserATNSimulatorDebug { + fmt.Println("added " + cfg.String() + " to intermediate") + } + } + } + } + + // Now figure out where the reach operation can take us... + var reach ATNConfigSet + + // This block optimizes the reach operation for intermediate sets which + // trivially indicate a termination state for the overall + // AdaptivePredict operation. + // + // The conditions assume that intermediate + // contains all configurations relevant to the reach set, but p + // condition is not true when one or more configurations have been + // withheld in SkippedStopStates, or when the current symbol is EOF. + // + if skippedStopStates == nil && t != TokenEOF { + if len(intermediate.configs) == 1 { + // Don't pursue the closure if there is just one state. + // It can only have one alternative just add to result + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = intermediate + } else if p.getUniqueAlt(intermediate) != ATNInvalidAltNumber { + // Also don't pursue the closure if there is unique alternative + // among the configurations. + reach = intermediate + } + } + // If the reach set could not be trivially determined, perform a closure + // operation on the intermediate set to compute its initial value. + // + if reach == nil { + reach = NewBaseATNConfigSet(fullCtx) + closureBusy := NewJStore[ATNConfig, Comparator[ATNConfig]](&ObjEqComparator[ATNConfig]{}) + treatEOFAsEpsilon := t == TokenEOF + amount := len(intermediate.configs) + for k := 0; k < amount; k++ { + p.closure(intermediate.configs[k], reach, closureBusy, false, fullCtx, treatEOFAsEpsilon) + } + } + if t == TokenEOF { + // After consuming EOF no additional input is possible, so we are + // only interested in configurations which reached the end of the + // decision rule (local context) or end of the start rule (full + // context). Update reach to contain only these configurations. This + // handles both explicit EOF transitions in the grammar and implicit + // EOF transitions following the end of the decision or start rule. + // + // When reach==intermediate, no closure operation was performed. In + // p case, removeAllConfigsNotInRuleStopState needs to check for + // reachable rule stop states as well as configurations already in + // a rule stop state. + // + // This is handled before the configurations in SkippedStopStates, + // because any configurations potentially added from that list are + // already guaranteed to meet p condition whether or not it's + // required. + // + reach = p.removeAllConfigsNotInRuleStopState(reach, reach == intermediate) + } + // If SkippedStopStates!=nil, then it contains at least one + // configuration. For full-context reach operations, these + // configurations reached the end of the start rule, in which case we + // only add them back to reach if no configuration during the current + // closure operation reached such a state. This ensures AdaptivePredict + // chooses an alternative Matching the longest overall sequence when + // multiple alternatives are viable. + // + if skippedStopStates != nil && ((!fullCtx) || (!PredictionModehasConfigInRuleStopState(reach))) { + for l := 0; l < len(skippedStopStates); l++ { + reach.Add(skippedStopStates[l], p.mergeCache) + } + } + if len(reach.GetItems()) == 0 { + return nil + } + + return reach +} + +// Return a configuration set containing only the configurations from +// {@code configs} which are in a {@link RuleStopState}. If all +// configurations in {@code configs} are already in a rule stop state, p +// method simply returns {@code configs}. +// +//

When {@code lookToEndOfRule} is true, p method uses +// {@link ATN//NextTokens} for each configuration in {@code configs} which is +// not already in a rule stop state to see if a rule stop state is reachable +// from the configuration via epsilon-only transitions.

+// +// @param configs the configuration set to update +// @param lookToEndOfRule when true, p method checks for rule stop states +// reachable by epsilon-only transitions from each configuration in +// {@code configs}. +// +// @return {@code configs} if all configurations in {@code configs} are in a +// rule stop state, otherwise return a Newconfiguration set containing only +// the configurations from {@code configs} which are in a rule stop state +func (p *ParserATNSimulator) removeAllConfigsNotInRuleStopState(configs ATNConfigSet, lookToEndOfRule bool) ATNConfigSet { + if PredictionModeallConfigsInRuleStopStates(configs) { + return configs + } + result := NewBaseATNConfigSet(configs.FullContext()) + for _, config := range configs.GetItems() { + if _, ok := config.GetState().(*RuleStopState); ok { + result.Add(config, p.mergeCache) + continue + } + if lookToEndOfRule && config.GetState().GetEpsilonOnlyTransitions() { + NextTokens := p.atn.NextTokens(config.GetState(), nil) + if NextTokens.contains(TokenEpsilon) { + endOfRuleState := p.atn.ruleToStopState[config.GetState().GetRuleIndex()] + result.Add(NewBaseATNConfig4(config, endOfRuleState), p.mergeCache) + } + } + } + return result +} + +func (p *ParserATNSimulator) computeStartState(a ATNState, ctx RuleContext, fullCtx bool) ATNConfigSet { + // always at least the implicit call to start rule + initialContext := predictionContextFromRuleContext(p.atn, ctx) + configs := NewBaseATNConfigSet(fullCtx) + for i := 0; i < len(a.GetTransitions()); i++ { + target := a.GetTransitions()[i].getTarget() + c := NewBaseATNConfig6(target, i+1, initialContext) + closureBusy := NewJStore[ATNConfig, Comparator[ATNConfig]](&BaseATNConfigComparator[ATNConfig]{}) + p.closure(c, configs, closureBusy, true, fullCtx, false) + } + return configs +} + +// This method transforms the start state computed by +// {@link //computeStartState} to the special start state used by a +// precedence DFA for a particular precedence value. The transformation +// process applies the following changes to the start state's configuration +// set. +// +//
    +//
  1. Evaluate the precedence predicates for each configuration using +// {@link SemanticContext//evalPrecedence}.
  2. +//
  3. Remove all configurations which predict an alternative greater than +// 1, for which another configuration that predicts alternative 1 is in the +// same ATN state with the same prediction context. This transformation is +// valid for the following reasons: +//
      +//
    • The closure block cannot contain any epsilon transitions which bypass +// the body of the closure, so all states reachable via alternative 1 are +// part of the precedence alternatives of the transformed left-recursive +// rule.
    • +//
    • The "primary" portion of a left recursive rule cannot contain an +// epsilon transition, so the only way an alternative other than 1 can exist +// in a state that is also reachable via alternative 1 is by nesting calls +// to the left-recursive rule, with the outer calls not being at the +// preferred precedence level.
    • +//
    +//
  4. +//
+// +//

+// The prediction context must be considered by p filter to address +// situations like the following. +//

+// +//
+// grammar TA
+// prog: statement* EOF
+// statement: letterA | statement letterA 'b'
+// letterA: 'a'
+// 
+//
+//

+// If the above grammar, the ATN state immediately before the token +// reference {@code 'a'} in {@code letterA} is reachable from the left edge +// of both the primary and closure blocks of the left-recursive rule +// {@code statement}. The prediction context associated with each of these +// configurations distinguishes between them, and prevents the alternative +// which stepped out to {@code prog} (and then back in to {@code statement} +// from being eliminated by the filter. +//

+// +// @param configs The configuration set computed by +// {@link //computeStartState} as the start state for the DFA. +// @return The transformed configuration set representing the start state +// for a precedence DFA at a particular precedence level (determined by +// calling {@link Parser//getPrecedence}). +func (p *ParserATNSimulator) applyPrecedenceFilter(configs ATNConfigSet) ATNConfigSet { + + statesFromAlt1 := make(map[int]PredictionContext) + configSet := NewBaseATNConfigSet(configs.FullContext()) + + for _, config := range configs.GetItems() { + // handle alt 1 first + if config.GetAlt() != 1 { + continue + } + updatedContext := config.GetSemanticContext().evalPrecedence(p.parser, p.outerContext) + if updatedContext == nil { + // the configuration was eliminated + continue + } + statesFromAlt1[config.GetState().GetStateNumber()] = config.GetContext() + if updatedContext != config.GetSemanticContext() { + configSet.Add(NewBaseATNConfig2(config, updatedContext), p.mergeCache) + } else { + configSet.Add(config, p.mergeCache) + } + } + for _, config := range configs.GetItems() { + + if config.GetAlt() == 1 { + // already handled + continue + } + // In the future, p elimination step could be updated to also + // filter the prediction context for alternatives predicting alt>1 + // (basically a graph subtraction algorithm). + if !config.getPrecedenceFilterSuppressed() { + context := statesFromAlt1[config.GetState().GetStateNumber()] + if context != nil && context.Equals(config.GetContext()) { + // eliminated + continue + } + } + configSet.Add(config, p.mergeCache) + } + return configSet +} + +func (p *ParserATNSimulator) getReachableTarget(trans Transition, ttype int) ATNState { + if trans.Matches(ttype, 0, p.atn.maxTokenType) { + return trans.getTarget() + } + + return nil +} + +func (p *ParserATNSimulator) getPredsForAmbigAlts(ambigAlts *BitSet, configs ATNConfigSet, nalts int) []SemanticContext { + + altToPred := make([]SemanticContext, nalts+1) + for _, c := range configs.GetItems() { + if ambigAlts.contains(c.GetAlt()) { + altToPred[c.GetAlt()] = SemanticContextorContext(altToPred[c.GetAlt()], c.GetSemanticContext()) + } + } + nPredAlts := 0 + for i := 1; i <= nalts; i++ { + pred := altToPred[i] + if pred == nil { + altToPred[i] = SemanticContextNone + } else if pred != SemanticContextNone { + nPredAlts++ + } + } + // nonambig alts are nil in altToPred + if nPredAlts == 0 { + altToPred = nil + } + if ParserATNSimulatorDebug { + fmt.Println("getPredsForAmbigAlts result " + fmt.Sprint(altToPred)) + } + return altToPred +} + +func (p *ParserATNSimulator) getPredicatePredictions(ambigAlts *BitSet, altToPred []SemanticContext) []*PredPrediction { + pairs := make([]*PredPrediction, 0) + containsPredicate := false + for i := 1; i < len(altToPred); i++ { + pred := altToPred[i] + // unpredicated is indicated by SemanticContextNONE + if ambigAlts != nil && ambigAlts.contains(i) { + pairs = append(pairs, NewPredPrediction(pred, i)) + } + if pred != SemanticContextNone { + containsPredicate = true + } + } + if !containsPredicate { + return nil + } + return pairs +} + +// This method is used to improve the localization of error messages by +// choosing an alternative rather than panicing a +// {@link NoViableAltException} in particular prediction scenarios where the +// {@link //ERROR} state was reached during ATN simulation. +// +//

+// The default implementation of p method uses the following +// algorithm to identify an ATN configuration which successfully parsed the +// decision entry rule. Choosing such an alternative ensures that the +// {@link ParserRuleContext} returned by the calling rule will be complete +// and valid, and the syntax error will be Reported later at a more +// localized location.

+// +//
    +//
  • If a syntactically valid path or paths reach the end of the decision rule and +// they are semantically valid if predicated, return the min associated alt.
  • +//
  • Else, if a semantically invalid but syntactically valid path exist +// or paths exist, return the minimum associated alt. +//
  • +//
  • Otherwise, return {@link ATN//INVALID_ALT_NUMBER}.
  • +//
+// +//

+// In some scenarios, the algorithm described above could predict an +// alternative which will result in a {@link FailedPredicateException} in +// the parser. Specifically, p could occur if the only configuration +// capable of successfully parsing to the end of the decision rule is +// blocked by a semantic predicate. By choosing p alternative within +// {@link //AdaptivePredict} instead of panicing a +// {@link NoViableAltException}, the resulting +// {@link FailedPredicateException} in the parser will identify the specific +// predicate which is preventing the parser from successfully parsing the +// decision rule, which helps developers identify and correct logic errors +// in semantic predicates. +//

+// +// @param configs The ATN configurations which were valid immediately before +// the {@link //ERROR} state was reached +// @param outerContext The is the \gamma_0 initial parser context from the paper +// or the parser stack at the instant before prediction commences. +// +// @return The value to return from {@link //AdaptivePredict}, or +// {@link ATN//INVALID_ALT_NUMBER} if a suitable alternative was not +// identified and {@link //AdaptivePredict} should Report an error instead. +func (p *ParserATNSimulator) getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(configs ATNConfigSet, outerContext ParserRuleContext) int { + cfgs := p.splitAccordingToSemanticValidity(configs, outerContext) + semValidConfigs := cfgs[0] + semInvalidConfigs := cfgs[1] + alt := p.GetAltThatFinishedDecisionEntryRule(semValidConfigs) + if alt != ATNInvalidAltNumber { // semantically/syntactically viable path exists + return alt + } + // Is there a syntactically valid path with a failed pred? + if len(semInvalidConfigs.GetItems()) > 0 { + alt = p.GetAltThatFinishedDecisionEntryRule(semInvalidConfigs) + if alt != ATNInvalidAltNumber { // syntactically viable path exists + return alt + } + } + return ATNInvalidAltNumber +} + +func (p *ParserATNSimulator) GetAltThatFinishedDecisionEntryRule(configs ATNConfigSet) int { + alts := NewIntervalSet() + + for _, c := range configs.GetItems() { + _, ok := c.GetState().(*RuleStopState) + + if c.GetReachesIntoOuterContext() > 0 || (ok && c.GetContext().hasEmptyPath()) { + alts.addOne(c.GetAlt()) + } + } + if alts.length() == 0 { + return ATNInvalidAltNumber + } + + return alts.first() +} + +// Walk the list of configurations and split them according to +// those that have preds evaluating to true/false. If no pred, assume +// true pred and include in succeeded set. Returns Pair of sets. +// +// Create a NewSet so as not to alter the incoming parameter. +// +// Assumption: the input stream has been restored to the starting point +// prediction, which is where predicates need to evaluate. + +type ATNConfigSetPair struct { + item0, item1 ATNConfigSet +} + +func (p *ParserATNSimulator) splitAccordingToSemanticValidity(configs ATNConfigSet, outerContext ParserRuleContext) []ATNConfigSet { + succeeded := NewBaseATNConfigSet(configs.FullContext()) + failed := NewBaseATNConfigSet(configs.FullContext()) + + for _, c := range configs.GetItems() { + if c.GetSemanticContext() != SemanticContextNone { + predicateEvaluationResult := c.GetSemanticContext().evaluate(p.parser, outerContext) + if predicateEvaluationResult { + succeeded.Add(c, nil) + } else { + failed.Add(c, nil) + } + } else { + succeeded.Add(c, nil) + } + } + return []ATNConfigSet{succeeded, failed} +} + +// Look through a list of predicate/alt pairs, returning alts for the +// +// pairs that win. A {@code NONE} predicate indicates an alt containing an +// unpredicated config which behaves as "always true." If !complete +// then we stop at the first predicate that evaluates to true. This +// includes pairs with nil predicates. +func (p *ParserATNSimulator) evalSemanticContext(predPredictions []*PredPrediction, outerContext ParserRuleContext, complete bool) *BitSet { + predictions := NewBitSet() + for i := 0; i < len(predPredictions); i++ { + pair := predPredictions[i] + if pair.pred == SemanticContextNone { + predictions.add(pair.alt) + if !complete { + break + } + continue + } + + predicateEvaluationResult := pair.pred.evaluate(p.parser, outerContext) + if ParserATNSimulatorDebug || ParserATNSimulatorDFADebug { + fmt.Println("eval pred " + pair.String() + "=" + fmt.Sprint(predicateEvaluationResult)) + } + if predicateEvaluationResult { + if ParserATNSimulatorDebug || ParserATNSimulatorDFADebug { + fmt.Println("PREDICT " + fmt.Sprint(pair.alt)) + } + predictions.add(pair.alt) + if !complete { + break + } + } + } + return predictions +} + +func (p *ParserATNSimulator) closure(config ATNConfig, configs ATNConfigSet, closureBusy *JStore[ATNConfig, Comparator[ATNConfig]], collectPredicates, fullCtx, treatEOFAsEpsilon bool) { + initialDepth := 0 + p.closureCheckingStopState(config, configs, closureBusy, collectPredicates, + fullCtx, initialDepth, treatEOFAsEpsilon) +} + +func (p *ParserATNSimulator) closureCheckingStopState(config ATNConfig, configs ATNConfigSet, closureBusy *JStore[ATNConfig, Comparator[ATNConfig]], collectPredicates, fullCtx bool, depth int, treatEOFAsEpsilon bool) { + if ParserATNSimulatorDebug { + fmt.Println("closure(" + config.String() + ")") + fmt.Println("configs(" + configs.String() + ")") + if config.GetReachesIntoOuterContext() > 50 { + panic("problem") + } + } + + if _, ok := config.GetState().(*RuleStopState); ok { + // We hit rule end. If we have context info, use it + // run thru all possible stack tops in ctx + if !config.GetContext().isEmpty() { + for i := 0; i < config.GetContext().length(); i++ { + if config.GetContext().getReturnState(i) == BasePredictionContextEmptyReturnState { + if fullCtx { + configs.Add(NewBaseATNConfig1(config, config.GetState(), BasePredictionContextEMPTY), p.mergeCache) + continue + } else { + // we have no context info, just chase follow links (if greedy) + if ParserATNSimulatorDebug { + fmt.Println("FALLING off rule " + p.getRuleName(config.GetState().GetRuleIndex())) + } + p.closureWork(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEOFAsEpsilon) + } + continue + } + returnState := p.atn.states[config.GetContext().getReturnState(i)] + newContext := config.GetContext().GetParent(i) // "pop" return state + + c := NewBaseATNConfig5(returnState, config.GetAlt(), newContext, config.GetSemanticContext()) + // While we have context to pop back from, we may have + // gotten that context AFTER having falling off a rule. + // Make sure we track that we are now out of context. + c.SetReachesIntoOuterContext(config.GetReachesIntoOuterContext()) + p.closureCheckingStopState(c, configs, closureBusy, collectPredicates, fullCtx, depth-1, treatEOFAsEpsilon) + } + return + } else if fullCtx { + // reached end of start rule + configs.Add(config, p.mergeCache) + return + } else { + // else if we have no context info, just chase follow links (if greedy) + if ParserATNSimulatorDebug { + fmt.Println("FALLING off rule " + p.getRuleName(config.GetState().GetRuleIndex())) + } + } + } + p.closureWork(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEOFAsEpsilon) +} + +// Do the actual work of walking epsilon edges// +func (p *ParserATNSimulator) closureWork(config ATNConfig, configs ATNConfigSet, closureBusy *JStore[ATNConfig, Comparator[ATNConfig]], collectPredicates, fullCtx bool, depth int, treatEOFAsEpsilon bool) { + state := config.GetState() + // optimization + if !state.GetEpsilonOnlyTransitions() { + configs.Add(config, p.mergeCache) + // make sure to not return here, because EOF transitions can act as + // both epsilon transitions and non-epsilon transitions. + } + for i := 0; i < len(state.GetTransitions()); i++ { + if i == 0 && p.canDropLoopEntryEdgeInLeftRecursiveRule(config) { + continue + } + + t := state.GetTransitions()[i] + _, ok := t.(*ActionTransition) + continueCollecting := collectPredicates && !ok + c := p.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEOFAsEpsilon) + if ci, ok := c.(*BaseATNConfig); ok && ci != nil { + newDepth := depth + + if _, ok := config.GetState().(*RuleStopState); ok { + // target fell off end of rule mark resulting c as having dipped into outer context + // We can't get here if incoming config was rule stop and we had context + // track how far we dip into outer context. Might + // come in handy and we avoid evaluating context dependent + // preds if p is > 0. + + if p.dfa != nil && p.dfa.getPrecedenceDfa() { + if t.(*EpsilonTransition).outermostPrecedenceReturn == p.dfa.atnStartState.GetRuleIndex() { + c.setPrecedenceFilterSuppressed(true) + } + } + + c.SetReachesIntoOuterContext(c.GetReachesIntoOuterContext() + 1) + + _, present := closureBusy.Put(c) + if present { + // avoid infinite recursion for right-recursive rules + continue + } + + configs.SetDipsIntoOuterContext(true) // TODO: can remove? only care when we add to set per middle of p method + newDepth-- + if ParserATNSimulatorDebug { + fmt.Println("dips into outer ctx: " + c.String()) + } + } else { + + if !t.getIsEpsilon() { + _, present := closureBusy.Put(c) + if present { + // avoid infinite recursion for EOF* and EOF+ + continue + } + } + if _, ok := t.(*RuleTransition); ok { + // latch when newDepth goes negative - once we step out of the entry context we can't return + if newDepth >= 0 { + newDepth++ + } + } + } + p.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEOFAsEpsilon) + } + } +} + +func (p *ParserATNSimulator) canDropLoopEntryEdgeInLeftRecursiveRule(config ATNConfig) bool { + if TurnOffLRLoopEntryBranchOpt { + return false + } + + _p := config.GetState() + + // First check to see if we are in StarLoopEntryState generated during + // left-recursion elimination. For efficiency, also check if + // the context has an empty stack case. If so, it would mean + // global FOLLOW so we can't perform optimization + if _p.GetStateType() != ATNStateStarLoopEntry { + return false + } + startLoop, ok := _p.(*StarLoopEntryState) + if !ok { + return false + } + if !startLoop.precedenceRuleDecision || + config.GetContext().isEmpty() || + config.GetContext().hasEmptyPath() { + return false + } + + // Require all return states to return back to the same rule + // that p is in. + numCtxs := config.GetContext().length() + for i := 0; i < numCtxs; i++ { + returnState := p.atn.states[config.GetContext().getReturnState(i)] + if returnState.GetRuleIndex() != _p.GetRuleIndex() { + return false + } + } + x := _p.GetTransitions()[0].getTarget() + decisionStartState := x.(BlockStartState) + blockEndStateNum := decisionStartState.getEndState().stateNumber + blockEndState := p.atn.states[blockEndStateNum].(*BlockEndState) + + // Verify that the top of each stack context leads to loop entry/exit + // state through epsilon edges and w/o leaving rule. + + for i := 0; i < numCtxs; i++ { // for each stack context + returnStateNumber := config.GetContext().getReturnState(i) + returnState := p.atn.states[returnStateNumber] + + // all states must have single outgoing epsilon edge + if len(returnState.GetTransitions()) != 1 || !returnState.GetTransitions()[0].getIsEpsilon() { + return false + } + + // Look for prefix op case like 'not expr', (' type ')' expr + returnStateTarget := returnState.GetTransitions()[0].getTarget() + if returnState.GetStateType() == ATNStateBlockEnd && returnStateTarget == _p { + continue + } + + // Look for 'expr op expr' or case where expr's return state is block end + // of (...)* internal block; the block end points to loop back + // which points to p but we don't need to check that + if returnState == blockEndState { + continue + } + + // Look for ternary expr ? expr : expr. The return state points at block end, + // which points at loop entry state + if returnStateTarget == blockEndState { + continue + } + + // Look for complex prefix 'between expr and expr' case where 2nd expr's + // return state points at block end state of (...)* internal block + if returnStateTarget.GetStateType() == ATNStateBlockEnd && + len(returnStateTarget.GetTransitions()) == 1 && + returnStateTarget.GetTransitions()[0].getIsEpsilon() && + returnStateTarget.GetTransitions()[0].getTarget() == _p { + continue + } + + // anything else ain't conforming + return false + } + + return true +} + +func (p *ParserATNSimulator) getRuleName(index int) string { + if p.parser != nil && index >= 0 { + return p.parser.GetRuleNames()[index] + } + var sb strings.Builder + sb.Grow(32) + + sb.WriteString("') + return sb.String() +} + +func (p *ParserATNSimulator) getEpsilonTarget(config ATNConfig, t Transition, collectPredicates, inContext, fullCtx, treatEOFAsEpsilon bool) ATNConfig { + + switch t.getSerializationType() { + case TransitionRULE: + return p.ruleTransition(config, t.(*RuleTransition)) + case TransitionPRECEDENCE: + return p.precedenceTransition(config, t.(*PrecedencePredicateTransition), collectPredicates, inContext, fullCtx) + case TransitionPREDICATE: + return p.predTransition(config, t.(*PredicateTransition), collectPredicates, inContext, fullCtx) + case TransitionACTION: + return p.actionTransition(config, t.(*ActionTransition)) + case TransitionEPSILON: + return NewBaseATNConfig4(config, t.getTarget()) + case TransitionATOM, TransitionRANGE, TransitionSET: + // EOF transitions act like epsilon transitions after the first EOF + // transition is traversed + if treatEOFAsEpsilon { + if t.Matches(TokenEOF, 0, 1) { + return NewBaseATNConfig4(config, t.getTarget()) + } + } + return nil + default: + return nil + } +} + +func (p *ParserATNSimulator) actionTransition(config ATNConfig, t *ActionTransition) *BaseATNConfig { + if ParserATNSimulatorDebug { + fmt.Println("ACTION edge " + strconv.Itoa(t.ruleIndex) + ":" + strconv.Itoa(t.actionIndex)) + } + return NewBaseATNConfig4(config, t.getTarget()) +} + +func (p *ParserATNSimulator) precedenceTransition(config ATNConfig, + pt *PrecedencePredicateTransition, collectPredicates, inContext, fullCtx bool) *BaseATNConfig { + + if ParserATNSimulatorDebug { + fmt.Println("PRED (collectPredicates=" + fmt.Sprint(collectPredicates) + ") " + + strconv.Itoa(pt.precedence) + ">=_p, ctx dependent=true") + if p.parser != nil { + fmt.Println("context surrounding pred is " + fmt.Sprint(p.parser.GetRuleInvocationStack(nil))) + } + } + var c *BaseATNConfig + if collectPredicates && inContext { + if fullCtx { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + currentPosition := p.input.Index() + p.input.Seek(p.startIndex) + predSucceeds := pt.getPredicate().evaluate(p.parser, p.outerContext) + p.input.Seek(currentPosition) + if predSucceeds { + c = NewBaseATNConfig4(config, pt.getTarget()) // no pred context + } + } else { + newSemCtx := SemanticContextandContext(config.GetSemanticContext(), pt.getPredicate()) + c = NewBaseATNConfig3(config, pt.getTarget(), newSemCtx) + } + } else { + c = NewBaseATNConfig4(config, pt.getTarget()) + } + if ParserATNSimulatorDebug { + fmt.Println("config from pred transition=" + c.String()) + } + return c +} + +func (p *ParserATNSimulator) predTransition(config ATNConfig, pt *PredicateTransition, collectPredicates, inContext, fullCtx bool) *BaseATNConfig { + + if ParserATNSimulatorDebug { + fmt.Println("PRED (collectPredicates=" + fmt.Sprint(collectPredicates) + ") " + strconv.Itoa(pt.ruleIndex) + + ":" + strconv.Itoa(pt.predIndex) + ", ctx dependent=" + fmt.Sprint(pt.isCtxDependent)) + if p.parser != nil { + fmt.Println("context surrounding pred is " + fmt.Sprint(p.parser.GetRuleInvocationStack(nil))) + } + } + var c *BaseATNConfig + if collectPredicates && (!pt.isCtxDependent || inContext) { + if fullCtx { + // In full context mode, we can evaluate predicates on-the-fly + // during closure, which dramatically reduces the size of + // the config sets. It also obviates the need to test predicates + // later during conflict resolution. + currentPosition := p.input.Index() + p.input.Seek(p.startIndex) + predSucceeds := pt.getPredicate().evaluate(p.parser, p.outerContext) + p.input.Seek(currentPosition) + if predSucceeds { + c = NewBaseATNConfig4(config, pt.getTarget()) // no pred context + } + } else { + newSemCtx := SemanticContextandContext(config.GetSemanticContext(), pt.getPredicate()) + c = NewBaseATNConfig3(config, pt.getTarget(), newSemCtx) + } + } else { + c = NewBaseATNConfig4(config, pt.getTarget()) + } + if ParserATNSimulatorDebug { + fmt.Println("config from pred transition=" + c.String()) + } + return c +} + +func (p *ParserATNSimulator) ruleTransition(config ATNConfig, t *RuleTransition) *BaseATNConfig { + if ParserATNSimulatorDebug { + fmt.Println("CALL rule " + p.getRuleName(t.getTarget().GetRuleIndex()) + ", ctx=" + config.GetContext().String()) + } + returnState := t.followState + newContext := SingletonBasePredictionContextCreate(config.GetContext(), returnState.GetStateNumber()) + return NewBaseATNConfig1(config, t.getTarget(), newContext) +} + +func (p *ParserATNSimulator) getConflictingAlts(configs ATNConfigSet) *BitSet { + altsets := PredictionModegetConflictingAltSubsets(configs) + return PredictionModeGetAlts(altsets) +} + +// Sam pointed out a problem with the previous definition, v3, of +// ambiguous states. If we have another state associated with conflicting +// alternatives, we should keep going. For example, the following grammar +// +// s : (ID | ID ID?) '' +// +// When the ATN simulation reaches the state before '', it has a DFA +// state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally +// 12|1|[] and 12|2|[] conflict, but we cannot stop processing p node +// because alternative to has another way to continue, via [6|2|[]]. +// The key is that we have a single state that has config's only associated +// with a single alternative, 2, and crucially the state transitions +// among the configurations are all non-epsilon transitions. That means +// we don't consider any conflicts that include alternative 2. So, we +// ignore the conflict between alts 1 and 2. We ignore a set of +// conflicting alts when there is an intersection with an alternative +// associated with a single alt state in the state&rarrconfig-list map. +// +// It's also the case that we might have two conflicting configurations but +// also a 3rd nonconflicting configuration for a different alternative: +// [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar: +// +// a : A | A | A B +// +// After Matching input A, we reach the stop state for rule A, state 1. +// State 8 is the state right before B. Clearly alternatives 1 and 2 +// conflict and no amount of further lookahead will separate the two. +// However, alternative 3 will be able to continue and so we do not +// stop working on p state. In the previous example, we're concerned +// with states associated with the conflicting alternatives. Here alt +// 3 is not associated with the conflicting configs, but since we can continue +// looking for input reasonably, I don't declare the state done. We +// ignore a set of conflicting alts when we have an alternative +// that we still need to pursue. +// + +func (p *ParserATNSimulator) getConflictingAltsOrUniqueAlt(configs ATNConfigSet) *BitSet { + var conflictingAlts *BitSet + if configs.GetUniqueAlt() != ATNInvalidAltNumber { + conflictingAlts = NewBitSet() + conflictingAlts.add(configs.GetUniqueAlt()) + } else { + conflictingAlts = configs.GetConflictingAlts() + } + return conflictingAlts +} + +func (p *ParserATNSimulator) GetTokenName(t int) string { + if t == TokenEOF { + return "EOF" + } + + if p.parser != nil && p.parser.GetLiteralNames() != nil { + if t >= len(p.parser.GetLiteralNames()) { + fmt.Println(strconv.Itoa(t) + " ttype out of range: " + strings.Join(p.parser.GetLiteralNames(), ",")) + // fmt.Println(p.parser.GetInputStream().(TokenStream).GetAllText()) // p seems incorrect + } else { + return p.parser.GetLiteralNames()[t] + "<" + strconv.Itoa(t) + ">" + } + } + + return strconv.Itoa(t) +} + +func (p *ParserATNSimulator) getLookaheadName(input TokenStream) string { + return p.GetTokenName(input.LA(1)) +} + +// Used for debugging in AdaptivePredict around execATN but I cut +// +// it out for clarity now that alg. works well. We can leave p +// "dead" code for a bit. +func (p *ParserATNSimulator) dumpDeadEndConfigs(nvae *NoViableAltException) { + + panic("Not implemented") + + // fmt.Println("dead end configs: ") + // var decs = nvae.deadEndConfigs + // + // for i:=0; i0) { + // var t = c.state.GetTransitions()[0] + // if t2, ok := t.(*AtomTransition); ok { + // trans = "Atom "+ p.GetTokenName(t2.label) + // } else if t3, ok := t.(SetTransition); ok { + // _, ok := t.(*NotSetTransition) + // + // var s string + // if (ok){ + // s = "~" + // } + // + // trans = s + "Set " + t3.set + // } + // } + // fmt.Errorf(c.String(p.parser, true) + ":" + trans) + // } +} + +func (p *ParserATNSimulator) noViableAlt(input TokenStream, outerContext ParserRuleContext, configs ATNConfigSet, startIndex int) *NoViableAltException { + return NewNoViableAltException(p.parser, input, input.Get(startIndex), input.LT(1), configs, outerContext) +} + +func (p *ParserATNSimulator) getUniqueAlt(configs ATNConfigSet) int { + alt := ATNInvalidAltNumber + for _, c := range configs.GetItems() { + if alt == ATNInvalidAltNumber { + alt = c.GetAlt() // found first alt + } else if c.GetAlt() != alt { + return ATNInvalidAltNumber + } + } + return alt +} + +// Add an edge to the DFA, if possible. This method calls +// {@link //addDFAState} to ensure the {@code to} state is present in the +// DFA. If {@code from} is {@code nil}, or if {@code t} is outside the +// range of edges that can be represented in the DFA tables, p method +// returns without adding the edge to the DFA. +// +//

If {@code to} is {@code nil}, p method returns {@code nil}. +// Otherwise, p method returns the {@link DFAState} returned by calling +// {@link //addDFAState} for the {@code to} state.

+// +// @param dfa The DFA +// @param from The source state for the edge +// @param t The input symbol +// @param to The target state for the edge +// +// @return If {@code to} is {@code nil}, p method returns {@code nil} +// otherwise p method returns the result of calling {@link //addDFAState} +// on {@code to} +func (p *ParserATNSimulator) addDFAEdge(dfa *DFA, from *DFAState, t int, to *DFAState) *DFAState { + if ParserATNSimulatorDebug { + fmt.Println("EDGE " + from.String() + " -> " + to.String() + " upon " + p.GetTokenName(t)) + } + if to == nil { + return nil + } + p.atn.stateMu.Lock() + to = p.addDFAState(dfa, to) // used existing if possible not incoming + p.atn.stateMu.Unlock() + if from == nil || t < -1 || t > p.atn.maxTokenType { + return to + } + p.atn.edgeMu.Lock() + if from.getEdges() == nil { + from.setEdges(make([]*DFAState, p.atn.maxTokenType+1+1)) + } + from.setIthEdge(t+1, to) // connect + p.atn.edgeMu.Unlock() + + if ParserATNSimulatorDebug { + var names []string + if p.parser != nil { + names = p.parser.GetLiteralNames() + } + + fmt.Println("DFA=\n" + dfa.String(names, nil)) + } + return to +} + +// Add state {@code D} to the DFA if it is not already present, and return +// the actual instance stored in the DFA. If a state equivalent to {@code D} +// is already in the DFA, the existing state is returned. Otherwise p +// method returns {@code D} after adding it to the DFA. +// +//

If {@code D} is {@link //ERROR}, p method returns {@link //ERROR} and +// does not change the DFA.

+// +// @param dfa The dfa +// @param D The DFA state to add +// @return The state stored in the DFA. This will be either the existing +// state if {@code D} is already in the DFA, or {@code D} itself if the +// state was not already present. +func (p *ParserATNSimulator) addDFAState(dfa *DFA, d *DFAState) *DFAState { + if d == ATNSimulatorError { + return d + } + existing, present := dfa.states.Get(d) + if present { + return existing + } + + // The state was not present, so update it with configs + // + d.stateNumber = dfa.states.Len() + if !d.configs.ReadOnly() { + d.configs.OptimizeConfigs(p.BaseATNSimulator) + d.configs.SetReadOnly(true) + } + dfa.states.Put(d) + if ParserATNSimulatorDebug { + fmt.Println("adding NewDFA state: " + d.String()) + } + + return d +} + +func (p *ParserATNSimulator) ReportAttemptingFullContext(dfa *DFA, conflictingAlts *BitSet, configs ATNConfigSet, startIndex, stopIndex int) { + if ParserATNSimulatorDebug || ParserATNSimulatorRetryDebug { + interval := NewInterval(startIndex, stopIndex+1) + fmt.Println("ReportAttemptingFullContext decision=" + strconv.Itoa(dfa.decision) + ":" + configs.String() + + ", input=" + p.parser.GetTokenStream().GetTextFromInterval(interval)) + } + if p.parser != nil { + p.parser.GetErrorListenerDispatch().ReportAttemptingFullContext(p.parser, dfa, startIndex, stopIndex, conflictingAlts, configs) + } +} + +func (p *ParserATNSimulator) ReportContextSensitivity(dfa *DFA, prediction int, configs ATNConfigSet, startIndex, stopIndex int) { + if ParserATNSimulatorDebug || ParserATNSimulatorRetryDebug { + interval := NewInterval(startIndex, stopIndex+1) + fmt.Println("ReportContextSensitivity decision=" + strconv.Itoa(dfa.decision) + ":" + configs.String() + + ", input=" + p.parser.GetTokenStream().GetTextFromInterval(interval)) + } + if p.parser != nil { + p.parser.GetErrorListenerDispatch().ReportContextSensitivity(p.parser, dfa, startIndex, stopIndex, prediction, configs) + } +} + +// If context sensitive parsing, we know it's ambiguity not conflict// +func (p *ParserATNSimulator) ReportAmbiguity(dfa *DFA, D *DFAState, startIndex, stopIndex int, + exact bool, ambigAlts *BitSet, configs ATNConfigSet) { + if ParserATNSimulatorDebug || ParserATNSimulatorRetryDebug { + interval := NewInterval(startIndex, stopIndex+1) + fmt.Println("ReportAmbiguity " + ambigAlts.String() + ":" + configs.String() + + ", input=" + p.parser.GetTokenStream().GetTextFromInterval(interval)) + } + if p.parser != nil { + p.parser.GetErrorListenerDispatch().ReportAmbiguity(p.parser, dfa, startIndex, stopIndex, exact, ambigAlts, configs) + } +} diff --git a/runtime/Go/antlr/v4/parser_rule_context.go b/runtime/Go/antlr/v4/parser_rule_context.go new file mode 100644 index 0000000000..1c8cee7479 --- /dev/null +++ b/runtime/Go/antlr/v4/parser_rule_context.go @@ -0,0 +1,362 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "reflect" + "strconv" +) + +type ParserRuleContext interface { + RuleContext + + SetException(RecognitionException) + + AddTokenNode(token Token) *TerminalNodeImpl + AddErrorNode(badToken Token) *ErrorNodeImpl + + EnterRule(listener ParseTreeListener) + ExitRule(listener ParseTreeListener) + + SetStart(Token) + GetStart() Token + + SetStop(Token) + GetStop() Token + + AddChild(child RuleContext) RuleContext + RemoveLastChild() +} + +type BaseParserRuleContext struct { + *BaseRuleContext + + start, stop Token + exception RecognitionException + children []Tree +} + +func NewBaseParserRuleContext(parent ParserRuleContext, invokingStateNumber int) *BaseParserRuleContext { + prc := new(BaseParserRuleContext) + + prc.BaseRuleContext = NewBaseRuleContext(parent, invokingStateNumber) + + prc.RuleIndex = -1 + // * If we are debugging or building a parse tree for a Visitor, + // we need to track all of the tokens and rule invocations associated + // with prc rule's context. This is empty for parsing w/o tree constr. + // operation because we don't the need to track the details about + // how we parse prc rule. + // / + prc.children = nil + prc.start = nil + prc.stop = nil + // The exception that forced prc rule to return. If the rule successfully + // completed, prc is {@code nil}. + prc.exception = nil + + return prc +} + +func (prc *BaseParserRuleContext) SetException(e RecognitionException) { + prc.exception = e +} + +func (prc *BaseParserRuleContext) GetChildren() []Tree { + return prc.children +} + +func (prc *BaseParserRuleContext) CopyFrom(ctx *BaseParserRuleContext) { + // from RuleContext + prc.parentCtx = ctx.parentCtx + prc.invokingState = ctx.invokingState + prc.children = nil + prc.start = ctx.start + prc.stop = ctx.stop +} + +func (prc *BaseParserRuleContext) GetText() string { + if prc.GetChildCount() == 0 { + return "" + } + + var s string + for _, child := range prc.children { + s += child.(ParseTree).GetText() + } + + return s +} + +// Double dispatch methods for listeners +func (prc *BaseParserRuleContext) EnterRule(listener ParseTreeListener) { +} + +func (prc *BaseParserRuleContext) ExitRule(listener ParseTreeListener) { +} + +// * Does not set parent link other add methods do that/// +func (prc *BaseParserRuleContext) addTerminalNodeChild(child TerminalNode) TerminalNode { + if prc.children == nil { + prc.children = make([]Tree, 0) + } + if child == nil { + panic("Child may not be null") + } + prc.children = append(prc.children, child) + return child +} + +func (prc *BaseParserRuleContext) AddChild(child RuleContext) RuleContext { + if prc.children == nil { + prc.children = make([]Tree, 0) + } + if child == nil { + panic("Child may not be null") + } + prc.children = append(prc.children, child) + return child +} + +// * Used by EnterOuterAlt to toss out a RuleContext previously added as +// we entered a rule. If we have // label, we will need to remove +// generic ruleContext object. +// / +func (prc *BaseParserRuleContext) RemoveLastChild() { + if prc.children != nil && len(prc.children) > 0 { + prc.children = prc.children[0 : len(prc.children)-1] + } +} + +func (prc *BaseParserRuleContext) AddTokenNode(token Token) *TerminalNodeImpl { + + node := NewTerminalNodeImpl(token) + prc.addTerminalNodeChild(node) + node.parentCtx = prc + return node + +} + +func (prc *BaseParserRuleContext) AddErrorNode(badToken Token) *ErrorNodeImpl { + node := NewErrorNodeImpl(badToken) + prc.addTerminalNodeChild(node) + node.parentCtx = prc + return node +} + +func (prc *BaseParserRuleContext) GetChild(i int) Tree { + if prc.children != nil && len(prc.children) >= i { + return prc.children[i] + } + + return nil +} + +func (prc *BaseParserRuleContext) GetChildOfType(i int, childType reflect.Type) RuleContext { + if childType == nil { + return prc.GetChild(i).(RuleContext) + } + + for j := 0; j < len(prc.children); j++ { + child := prc.children[j] + if reflect.TypeOf(child) == childType { + if i == 0 { + return child.(RuleContext) + } + + i-- + } + } + + return nil +} + +func (prc *BaseParserRuleContext) ToStringTree(ruleNames []string, recog Recognizer) string { + return TreesStringTree(prc, ruleNames, recog) +} + +func (prc *BaseParserRuleContext) GetRuleContext() RuleContext { + return prc +} + +func (prc *BaseParserRuleContext) Accept(visitor ParseTreeVisitor) interface{} { + return visitor.VisitChildren(prc) +} + +func (prc *BaseParserRuleContext) SetStart(t Token) { + prc.start = t +} + +func (prc *BaseParserRuleContext) GetStart() Token { + return prc.start +} + +func (prc *BaseParserRuleContext) SetStop(t Token) { + prc.stop = t +} + +func (prc *BaseParserRuleContext) GetStop() Token { + return prc.stop +} + +func (prc *BaseParserRuleContext) GetToken(ttype int, i int) TerminalNode { + + for j := 0; j < len(prc.children); j++ { + child := prc.children[j] + if c2, ok := child.(TerminalNode); ok { + if c2.GetSymbol().GetTokenType() == ttype { + if i == 0 { + return c2 + } + + i-- + } + } + } + return nil +} + +func (prc *BaseParserRuleContext) GetTokens(ttype int) []TerminalNode { + if prc.children == nil { + return make([]TerminalNode, 0) + } + + tokens := make([]TerminalNode, 0) + + for j := 0; j < len(prc.children); j++ { + child := prc.children[j] + if tchild, ok := child.(TerminalNode); ok { + if tchild.GetSymbol().GetTokenType() == ttype { + tokens = append(tokens, tchild) + } + } + } + + return tokens +} + +func (prc *BaseParserRuleContext) GetPayload() interface{} { + return prc +} + +func (prc *BaseParserRuleContext) getChild(ctxType reflect.Type, i int) RuleContext { + if prc.children == nil || i < 0 || i >= len(prc.children) { + return nil + } + + j := -1 // what element have we found with ctxType? + for _, o := range prc.children { + + childType := reflect.TypeOf(o) + + if childType.Implements(ctxType) { + j++ + if j == i { + return o.(RuleContext) + } + } + } + return nil +} + +// Go lacks generics, so it's not possible for us to return the child with the correct type, but we do +// check for convertibility + +func (prc *BaseParserRuleContext) GetTypedRuleContext(ctxType reflect.Type, i int) RuleContext { + return prc.getChild(ctxType, i) +} + +func (prc *BaseParserRuleContext) GetTypedRuleContexts(ctxType reflect.Type) []RuleContext { + if prc.children == nil { + return make([]RuleContext, 0) + } + + contexts := make([]RuleContext, 0) + + for _, child := range prc.children { + childType := reflect.TypeOf(child) + + if childType.ConvertibleTo(ctxType) { + contexts = append(contexts, child.(RuleContext)) + } + } + return contexts +} + +func (prc *BaseParserRuleContext) GetChildCount() int { + if prc.children == nil { + return 0 + } + + return len(prc.children) +} + +func (prc *BaseParserRuleContext) GetSourceInterval() *Interval { + if prc.start == nil || prc.stop == nil { + return TreeInvalidInterval + } + + return NewInterval(prc.start.GetTokenIndex(), prc.stop.GetTokenIndex()) +} + +//need to manage circular dependencies, so export now + +// Print out a whole tree, not just a node, in LISP format +// (root child1 .. childN). Print just a node if b is a leaf. +// + +func (prc *BaseParserRuleContext) String(ruleNames []string, stop RuleContext) string { + + var p ParserRuleContext = prc + s := "[" + for p != nil && p != stop { + if ruleNames == nil { + if !p.IsEmpty() { + s += strconv.Itoa(p.GetInvokingState()) + } + } else { + ri := p.GetRuleIndex() + var ruleName string + if ri >= 0 && ri < len(ruleNames) { + ruleName = ruleNames[ri] + } else { + ruleName = strconv.Itoa(ri) + } + s += ruleName + } + if p.GetParent() != nil && (ruleNames != nil || !p.GetParent().(ParserRuleContext).IsEmpty()) { + s += " " + } + pi := p.GetParent() + if pi != nil { + p = pi.(ParserRuleContext) + } else { + p = nil + } + } + s += "]" + return s +} + +var ParserRuleContextEmpty = NewBaseParserRuleContext(nil, -1) + +type InterpreterRuleContext interface { + ParserRuleContext +} + +type BaseInterpreterRuleContext struct { + *BaseParserRuleContext +} + +func NewBaseInterpreterRuleContext(parent BaseInterpreterRuleContext, invokingStateNumber, ruleIndex int) *BaseInterpreterRuleContext { + + prc := new(BaseInterpreterRuleContext) + + prc.BaseParserRuleContext = NewBaseParserRuleContext(parent, invokingStateNumber) + + prc.RuleIndex = ruleIndex + + return prc +} diff --git a/runtime/Go/antlr/v4/prediction_context.go b/runtime/Go/antlr/v4/prediction_context.go new file mode 100644 index 0000000000..72d24c3260 --- /dev/null +++ b/runtime/Go/antlr/v4/prediction_context.go @@ -0,0 +1,764 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "golang.org/x/exp/slices" + "strconv" +) + +// Represents {@code $} in local context prediction, which means wildcard. +// {@code//+x =//}. +// / +const ( + BasePredictionContextEmptyReturnState = 0x7FFFFFFF +) + +// Represents {@code $} in an array in full context mode, when {@code $} +// doesn't mean wildcard: {@code $ + x = [$,x]}. Here, +// {@code $} = {@link //EmptyReturnState}. +// / + +var ( + BasePredictionContextglobalNodeCount = 1 + BasePredictionContextid = BasePredictionContextglobalNodeCount +) + +type PredictionContext interface { + Hash() int + Equals(interface{}) bool + GetParent(int) PredictionContext + getReturnState(int) int + length() int + isEmpty() bool + hasEmptyPath() bool + String() string +} + +type BasePredictionContext struct { + cachedHash int +} + +func NewBasePredictionContext(cachedHash int) *BasePredictionContext { + pc := new(BasePredictionContext) + pc.cachedHash = cachedHash + + return pc +} + +func (b *BasePredictionContext) isEmpty() bool { + return false +} + +func calculateHash(parent PredictionContext, returnState int) int { + h := murmurInit(1) + h = murmurUpdate(h, parent.Hash()) + h = murmurUpdate(h, returnState) + return murmurFinish(h, 2) +} + +var _emptyPredictionContextHash int + +func init() { + _emptyPredictionContextHash = murmurInit(1) + _emptyPredictionContextHash = murmurFinish(_emptyPredictionContextHash, 0) +} + +func calculateEmptyHash() int { + return _emptyPredictionContextHash +} + +// Used to cache {@link BasePredictionContext} objects. Its used for the shared +// context cash associated with contexts in DFA states. This cache +// can be used for both lexers and parsers. + +type PredictionContextCache struct { + cache map[PredictionContext]PredictionContext +} + +func NewPredictionContextCache() *PredictionContextCache { + t := new(PredictionContextCache) + t.cache = make(map[PredictionContext]PredictionContext) + return t +} + +// Add a context to the cache and return it. If the context already exists, +// return that one instead and do not add a Newcontext to the cache. +// Protect shared cache from unsafe thread access. +func (p *PredictionContextCache) add(ctx PredictionContext) PredictionContext { + if ctx == BasePredictionContextEMPTY { + return BasePredictionContextEMPTY + } + existing := p.cache[ctx] + if existing != nil { + return existing + } + p.cache[ctx] = ctx + return ctx +} + +func (p *PredictionContextCache) Get(ctx PredictionContext) PredictionContext { + return p.cache[ctx] +} + +func (p *PredictionContextCache) length() int { + return len(p.cache) +} + +type SingletonPredictionContext interface { + PredictionContext +} + +type BaseSingletonPredictionContext struct { + *BasePredictionContext + + parentCtx PredictionContext + returnState int +} + +func NewBaseSingletonPredictionContext(parent PredictionContext, returnState int) *BaseSingletonPredictionContext { + var cachedHash int + if parent != nil { + cachedHash = calculateHash(parent, returnState) + } else { + cachedHash = calculateEmptyHash() + } + + s := new(BaseSingletonPredictionContext) + s.BasePredictionContext = NewBasePredictionContext(cachedHash) + + s.parentCtx = parent + s.returnState = returnState + + return s +} + +func SingletonBasePredictionContextCreate(parent PredictionContext, returnState int) PredictionContext { + if returnState == BasePredictionContextEmptyReturnState && parent == nil { + // someone can pass in the bits of an array ctx that mean $ + return BasePredictionContextEMPTY + } + + return NewBaseSingletonPredictionContext(parent, returnState) +} + +func (b *BaseSingletonPredictionContext) length() int { + return 1 +} + +func (b *BaseSingletonPredictionContext) GetParent(index int) PredictionContext { + return b.parentCtx +} + +func (b *BaseSingletonPredictionContext) getReturnState(index int) int { + return b.returnState +} + +func (b *BaseSingletonPredictionContext) hasEmptyPath() bool { + return b.returnState == BasePredictionContextEmptyReturnState +} + +func (b *BaseSingletonPredictionContext) Hash() int { + return b.cachedHash +} + +func (b *BaseSingletonPredictionContext) Equals(other interface{}) bool { + if b == other { + return true + } + if _, ok := other.(*BaseSingletonPredictionContext); !ok { + return false + } + + otherP := other.(*BaseSingletonPredictionContext) + + if b.returnState != otherP.getReturnState(0) { + return false + } + if b.parentCtx == nil { + return otherP.parentCtx == nil + } + + return b.parentCtx.Equals(otherP.parentCtx) +} + +func (b *BaseSingletonPredictionContext) String() string { + var up string + + if b.parentCtx == nil { + up = "" + } else { + up = b.parentCtx.String() + } + + if len(up) == 0 { + if b.returnState == BasePredictionContextEmptyReturnState { + return "$" + } + + return strconv.Itoa(b.returnState) + } + + return strconv.Itoa(b.returnState) + " " + up +} + +var BasePredictionContextEMPTY = NewEmptyPredictionContext() + +type EmptyPredictionContext struct { + *BaseSingletonPredictionContext +} + +func NewEmptyPredictionContext() *EmptyPredictionContext { + + p := new(EmptyPredictionContext) + + p.BaseSingletonPredictionContext = NewBaseSingletonPredictionContext(nil, BasePredictionContextEmptyReturnState) + p.cachedHash = calculateEmptyHash() + return p +} + +func (e *EmptyPredictionContext) isEmpty() bool { + return true +} + +func (e *EmptyPredictionContext) GetParent(index int) PredictionContext { + return nil +} + +func (e *EmptyPredictionContext) getReturnState(index int) int { + return e.returnState +} + +func (e *EmptyPredictionContext) Hash() int { + return e.cachedHash +} + +func (e *EmptyPredictionContext) Equals(other interface{}) bool { + return e == other +} + +func (e *EmptyPredictionContext) String() string { + return "$" +} + +type ArrayPredictionContext struct { + *BasePredictionContext + + parents []PredictionContext + returnStates []int +} + +func NewArrayPredictionContext(parents []PredictionContext, returnStates []int) *ArrayPredictionContext { + // Parent can be nil only if full ctx mode and we make an array + // from {@link //EMPTY} and non-empty. We merge {@link //EMPTY} by using + // nil parent and + // returnState == {@link //EmptyReturnState}. + hash := murmurInit(1) + + for _, parent := range parents { + hash = murmurUpdate(hash, parent.Hash()) + } + + for _, returnState := range returnStates { + hash = murmurUpdate(hash, returnState) + } + + hash = murmurFinish(hash, len(parents)<<1) + + c := new(ArrayPredictionContext) + c.BasePredictionContext = NewBasePredictionContext(hash) + + c.parents = parents + c.returnStates = returnStates + + return c +} + +func (a *ArrayPredictionContext) GetReturnStates() []int { + return a.returnStates +} + +func (a *ArrayPredictionContext) hasEmptyPath() bool { + return a.getReturnState(a.length()-1) == BasePredictionContextEmptyReturnState +} + +func (a *ArrayPredictionContext) isEmpty() bool { + // since EmptyReturnState can only appear in the last position, we + // don't need to verify that size==1 + return a.returnStates[0] == BasePredictionContextEmptyReturnState +} + +func (a *ArrayPredictionContext) length() int { + return len(a.returnStates) +} + +func (a *ArrayPredictionContext) GetParent(index int) PredictionContext { + return a.parents[index] +} + +func (a *ArrayPredictionContext) getReturnState(index int) int { + return a.returnStates[index] +} + +// Equals is the default comparison function for ArrayPredictionContext when no specialized +// implementation is needed for a collection +func (a *ArrayPredictionContext) Equals(o interface{}) bool { + if a == o { + return true + } + other, ok := o.(*ArrayPredictionContext) + if !ok { + return false + } + if a.cachedHash != other.Hash() { + return false // can't be same if hash is different + } + + // Must compare the actual array elements and not just the array address + // + return slices.Equal(a.returnStates, other.returnStates) && + slices.EqualFunc(a.parents, other.parents, func(x, y PredictionContext) bool { + return x.Equals(y) + }) +} + +// Hash is the default hash function for ArrayPredictionContext when no specialized +// implementation is needed for a collection +func (a *ArrayPredictionContext) Hash() int { + return a.BasePredictionContext.cachedHash +} + +func (a *ArrayPredictionContext) String() string { + if a.isEmpty() { + return "[]" + } + + s := "[" + for i := 0; i < len(a.returnStates); i++ { + if i > 0 { + s = s + ", " + } + if a.returnStates[i] == BasePredictionContextEmptyReturnState { + s = s + "$" + continue + } + s = s + strconv.Itoa(a.returnStates[i]) + if a.parents[i] != nil { + s = s + " " + a.parents[i].String() + } else { + s = s + "nil" + } + } + + return s + "]" +} + +// Convert a {@link RuleContext} tree to a {@link BasePredictionContext} graph. +// Return {@link //EMPTY} if {@code outerContext} is empty or nil. +// / +func predictionContextFromRuleContext(a *ATN, outerContext RuleContext) PredictionContext { + if outerContext == nil { + outerContext = ParserRuleContextEmpty + } + // if we are in RuleContext of start rule, s, then BasePredictionContext + // is EMPTY. Nobody called us. (if we are empty, return empty) + if outerContext.GetParent() == nil || outerContext == ParserRuleContextEmpty { + return BasePredictionContextEMPTY + } + // If we have a parent, convert it to a BasePredictionContext graph + parent := predictionContextFromRuleContext(a, outerContext.GetParent().(RuleContext)) + state := a.states[outerContext.GetInvokingState()] + transition := state.GetTransitions()[0] + + return SingletonBasePredictionContextCreate(parent, transition.(*RuleTransition).followState.GetStateNumber()) +} + +func merge(a, b PredictionContext, rootIsWildcard bool, mergeCache *DoubleDict) PredictionContext { + // share same graph if both same + if a == b { + return a + } + + ac, ok1 := a.(*BaseSingletonPredictionContext) + bc, ok2 := b.(*BaseSingletonPredictionContext) + + if ok1 && ok2 { + return mergeSingletons(ac, bc, rootIsWildcard, mergeCache) + } + // At least one of a or b is array + // If one is $ and rootIsWildcard, return $ as// wildcard + if rootIsWildcard { + if _, ok := a.(*EmptyPredictionContext); ok { + return a + } + if _, ok := b.(*EmptyPredictionContext); ok { + return b + } + } + // convert singleton so both are arrays to normalize + if _, ok := a.(*BaseSingletonPredictionContext); ok { + a = NewArrayPredictionContext([]PredictionContext{a.GetParent(0)}, []int{a.getReturnState(0)}) + } + if _, ok := b.(*BaseSingletonPredictionContext); ok { + b = NewArrayPredictionContext([]PredictionContext{b.GetParent(0)}, []int{b.getReturnState(0)}) + } + return mergeArrays(a.(*ArrayPredictionContext), b.(*ArrayPredictionContext), rootIsWildcard, mergeCache) +} + +// Merge two {@link SingletonBasePredictionContext} instances. +// +//

Stack tops equal, parents merge is same return left graph.
+//

+// +//

Same stack top, parents differ merge parents giving array node, then +// remainders of those graphs. A Newroot node is created to point to the +// merged parents.
+//

+// +//

Different stack tops pointing to same parent. Make array node for the +// root where both element in the root point to the same (original) +// parent.
+//

+// +//

Different stack tops pointing to different parents. Make array node for +// the root where each element points to the corresponding original +// parent.
+//

+// +// @param a the first {@link SingletonBasePredictionContext} +// @param b the second {@link SingletonBasePredictionContext} +// @param rootIsWildcard {@code true} if this is a local-context merge, +// otherwise false to indicate a full-context merge +// @param mergeCache +// / +func mergeSingletons(a, b *BaseSingletonPredictionContext, rootIsWildcard bool, mergeCache *DoubleDict) PredictionContext { + if mergeCache != nil { + previous := mergeCache.Get(a.Hash(), b.Hash()) + if previous != nil { + return previous.(PredictionContext) + } + previous = mergeCache.Get(b.Hash(), a.Hash()) + if previous != nil { + return previous.(PredictionContext) + } + } + + rootMerge := mergeRoot(a, b, rootIsWildcard) + if rootMerge != nil { + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), rootMerge) + } + return rootMerge + } + if a.returnState == b.returnState { + parent := merge(a.parentCtx, b.parentCtx, rootIsWildcard, mergeCache) + // if parent is same as existing a or b parent or reduced to a parent, + // return it + if parent == a.parentCtx { + return a // ax + bx = ax, if a=b + } + if parent == b.parentCtx { + return b // ax + bx = bx, if a=b + } + // else: ax + ay = a'[x,y] + // merge parents x and y, giving array node with x,y then remainders + // of those graphs. dup a, a' points at merged array + // Newjoined parent so create Newsingleton pointing to it, a' + spc := SingletonBasePredictionContextCreate(parent, a.returnState) + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), spc) + } + return spc + } + // a != b payloads differ + // see if we can collapse parents due to $+x parents if local ctx + var singleParent PredictionContext + if a == b || (a.parentCtx != nil && a.parentCtx == b.parentCtx) { // ax + + // bx = + // [a,b]x + singleParent = a.parentCtx + } + if singleParent != nil { // parents are same + // sort payloads and use same parent + payloads := []int{a.returnState, b.returnState} + if a.returnState > b.returnState { + payloads[0] = b.returnState + payloads[1] = a.returnState + } + parents := []PredictionContext{singleParent, singleParent} + apc := NewArrayPredictionContext(parents, payloads) + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), apc) + } + return apc + } + // parents differ and can't merge them. Just pack together + // into array can't merge. + // ax + by = [ax,by] + payloads := []int{a.returnState, b.returnState} + parents := []PredictionContext{a.parentCtx, b.parentCtx} + if a.returnState > b.returnState { // sort by payload + payloads[0] = b.returnState + payloads[1] = a.returnState + parents = []PredictionContext{b.parentCtx, a.parentCtx} + } + apc := NewArrayPredictionContext(parents, payloads) + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), apc) + } + return apc +} + +// Handle case where at least one of {@code a} or {@code b} is +// {@link //EMPTY}. In the following diagrams, the symbol {@code $} is used +// to represent {@link //EMPTY}. +// +//

Local-Context Merges

+// +//

These local-context merge operations are used when {@code rootIsWildcard} +// is true.

+// +//

{@link //EMPTY} is superset of any graph return {@link //EMPTY}.
+//

+// +//

{@link //EMPTY} and anything is {@code //EMPTY}, so merged parent is +// {@code //EMPTY} return left graph.
+//

+// +//

Special case of last merge if local context.
+//

+// +//

Full-Context Merges

+// +//

These full-context merge operations are used when {@code rootIsWildcard} +// is false.

+// +//

+// +//

Must keep all contexts {@link //EMPTY} in array is a special value (and +// nil parent).
+//

+// +//

+// +// @param a the first {@link SingletonBasePredictionContext} +// @param b the second {@link SingletonBasePredictionContext} +// @param rootIsWildcard {@code true} if this is a local-context merge, +// otherwise false to indicate a full-context merge +// / +func mergeRoot(a, b SingletonPredictionContext, rootIsWildcard bool) PredictionContext { + if rootIsWildcard { + if a == BasePredictionContextEMPTY { + return BasePredictionContextEMPTY // // + b =// + } + if b == BasePredictionContextEMPTY { + return BasePredictionContextEMPTY // a +// =// + } + } else { + if a == BasePredictionContextEMPTY && b == BasePredictionContextEMPTY { + return BasePredictionContextEMPTY // $ + $ = $ + } else if a == BasePredictionContextEMPTY { // $ + x = [$,x] + payloads := []int{b.getReturnState(-1), BasePredictionContextEmptyReturnState} + parents := []PredictionContext{b.GetParent(-1), nil} + return NewArrayPredictionContext(parents, payloads) + } else if b == BasePredictionContextEMPTY { // x + $ = [$,x] ($ is always first if present) + payloads := []int{a.getReturnState(-1), BasePredictionContextEmptyReturnState} + parents := []PredictionContext{a.GetParent(-1), nil} + return NewArrayPredictionContext(parents, payloads) + } + } + return nil +} + +// Merge two {@link ArrayBasePredictionContext} instances. +// +//

Different tops, different parents.
+//

+// +//

Shared top, same parents.
+//

+// +//

Shared top, different parents.
+//

+// +//

Shared top, all shared parents.
+//

+// +//

Equal tops, merge parents and reduce top to +// {@link SingletonBasePredictionContext}.
+//

+// / +func mergeArrays(a, b *ArrayPredictionContext, rootIsWildcard bool, mergeCache *DoubleDict) PredictionContext { + if mergeCache != nil { + previous := mergeCache.Get(a.Hash(), b.Hash()) + if previous != nil { + return previous.(PredictionContext) + } + previous = mergeCache.Get(b.Hash(), a.Hash()) + if previous != nil { + return previous.(PredictionContext) + } + } + // merge sorted payloads a + b => M + i := 0 // walks a + j := 0 // walks b + k := 0 // walks target M array + + mergedReturnStates := make([]int, len(a.returnStates)+len(b.returnStates)) + mergedParents := make([]PredictionContext, len(a.returnStates)+len(b.returnStates)) + // walk and merge to yield mergedParents, mergedReturnStates + for i < len(a.returnStates) && j < len(b.returnStates) { + aParent := a.parents[i] + bParent := b.parents[j] + if a.returnStates[i] == b.returnStates[j] { + // same payload (stack tops are equal), must yield merged singleton + payload := a.returnStates[i] + // $+$ = $ + bothDollars := payload == BasePredictionContextEmptyReturnState && aParent == nil && bParent == nil + axAX := aParent != nil && bParent != nil && aParent == bParent // ax+ax + // -> + // ax + if bothDollars || axAX { + mergedParents[k] = aParent // choose left + mergedReturnStates[k] = payload + } else { // ax+ay -> a'[x,y] + mergedParent := merge(aParent, bParent, rootIsWildcard, mergeCache) + mergedParents[k] = mergedParent + mergedReturnStates[k] = payload + } + i++ // hop over left one as usual + j++ // but also Skip one in right side since we merge + } else if a.returnStates[i] < b.returnStates[j] { // copy a[i] to M + mergedParents[k] = aParent + mergedReturnStates[k] = a.returnStates[i] + i++ + } else { // b > a, copy b[j] to M + mergedParents[k] = bParent + mergedReturnStates[k] = b.returnStates[j] + j++ + } + k++ + } + // copy over any payloads remaining in either array + if i < len(a.returnStates) { + for p := i; p < len(a.returnStates); p++ { + mergedParents[k] = a.parents[p] + mergedReturnStates[k] = a.returnStates[p] + k++ + } + } else { + for p := j; p < len(b.returnStates); p++ { + mergedParents[k] = b.parents[p] + mergedReturnStates[k] = b.returnStates[p] + k++ + } + } + // trim merged if we combined a few that had same stack tops + if k < len(mergedParents) { // write index < last position trim + if k == 1 { // for just one merged element, return singleton top + pc := SingletonBasePredictionContextCreate(mergedParents[0], mergedReturnStates[0]) + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), pc) + } + return pc + } + mergedParents = mergedParents[0:k] + mergedReturnStates = mergedReturnStates[0:k] + } + + M := NewArrayPredictionContext(mergedParents, mergedReturnStates) + + // if we created same array as a or b, return that instead + // TODO: track whether this is possible above during merge sort for speed + if M == a { + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), a) + } + return a + } + if M == b { + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), b) + } + return b + } + combineCommonParents(mergedParents) + + if mergeCache != nil { + mergeCache.set(a.Hash(), b.Hash(), M) + } + return M +} + +// Make pass over all M {@code parents} merge any {@code equals()} +// ones. +// / +func combineCommonParents(parents []PredictionContext) { + uniqueParents := make(map[PredictionContext]PredictionContext) + + for p := 0; p < len(parents); p++ { + parent := parents[p] + if uniqueParents[parent] == nil { + uniqueParents[parent] = parent + } + } + for q := 0; q < len(parents); q++ { + parents[q] = uniqueParents[parents[q]] + } +} + +func getCachedBasePredictionContext(context PredictionContext, contextCache *PredictionContextCache, visited map[PredictionContext]PredictionContext) PredictionContext { + + if context.isEmpty() { + return context + } + existing := visited[context] + if existing != nil { + return existing + } + existing = contextCache.Get(context) + if existing != nil { + visited[context] = existing + return existing + } + changed := false + parents := make([]PredictionContext, context.length()) + for i := 0; i < len(parents); i++ { + parent := getCachedBasePredictionContext(context.GetParent(i), contextCache, visited) + if changed || parent != context.GetParent(i) { + if !changed { + parents = make([]PredictionContext, context.length()) + for j := 0; j < context.length(); j++ { + parents[j] = context.GetParent(j) + } + changed = true + } + parents[i] = parent + } + } + if !changed { + contextCache.add(context) + visited[context] = context + return context + } + var updated PredictionContext + if len(parents) == 0 { + updated = BasePredictionContextEMPTY + } else if len(parents) == 1 { + updated = SingletonBasePredictionContextCreate(parents[0], context.getReturnState(0)) + } else { + updated = NewArrayPredictionContext(parents, context.(*ArrayPredictionContext).GetReturnStates()) + } + contextCache.add(updated) + visited[updated] = updated + visited[context] = updated + + return updated +} diff --git a/runtime/Go/antlr/v4/prediction_mode.go b/runtime/Go/antlr/v4/prediction_mode.go new file mode 100644 index 0000000000..270a89d393 --- /dev/null +++ b/runtime/Go/antlr/v4/prediction_mode.go @@ -0,0 +1,529 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +// This enumeration defines the prediction modes available in ANTLR 4 along with +// utility methods for analyzing configuration sets for conflicts and/or +// ambiguities. + +const ( + // + // The SLL(*) prediction mode. This prediction mode ignores the current + // parser context when making predictions. This is the fastest prediction + // mode, and provides correct results for many grammars. This prediction + // mode is more powerful than the prediction mode provided by ANTLR 3, but + // may result in syntax errors for grammar and input combinations which are + // not SLL. + // + //

+ // When using this prediction mode, the parser will either return a correct + // parse tree (i.e. the same parse tree that would be returned with the + // {@link //LL} prediction mode), or it will Report a syntax error. If a + // syntax error is encountered when using the {@link //SLL} prediction mode, + // it may be due to either an actual syntax error in the input or indicate + // that the particular combination of grammar and input requires the more + // powerful {@link //LL} prediction abilities to complete successfully.

+ // + //

+ // This prediction mode does not provide any guarantees for prediction + // behavior for syntactically-incorrect inputs.

+ // + PredictionModeSLL = 0 + // + // The LL(*) prediction mode. This prediction mode allows the current parser + // context to be used for resolving SLL conflicts that occur during + // prediction. This is the fastest prediction mode that guarantees correct + // parse results for all combinations of grammars with syntactically correct + // inputs. + // + //

+ // When using this prediction mode, the parser will make correct decisions + // for all syntactically-correct grammar and input combinations. However, in + // cases where the grammar is truly ambiguous this prediction mode might not + // Report a precise answer for exactly which alternatives are + // ambiguous.

+ // + //

+ // This prediction mode does not provide any guarantees for prediction + // behavior for syntactically-incorrect inputs.

+ // + PredictionModeLL = 1 + // + // The LL(*) prediction mode with exact ambiguity detection. In addition to + // the correctness guarantees provided by the {@link //LL} prediction mode, + // this prediction mode instructs the prediction algorithm to determine the + // complete and exact set of ambiguous alternatives for every ambiguous + // decision encountered while parsing. + // + //

+ // This prediction mode may be used for diagnosing ambiguities during + // grammar development. Due to the performance overhead of calculating sets + // of ambiguous alternatives, this prediction mode should be avoided when + // the exact results are not necessary.

+ // + //

+ // This prediction mode does not provide any guarantees for prediction + // behavior for syntactically-incorrect inputs.

+ // + PredictionModeLLExactAmbigDetection = 2 +) + +// Computes the SLL prediction termination condition. +// +//

+// This method computes the SLL prediction termination condition for both of +// the following cases.

+// +//
    +//
  • The usual SLL+LL fallback upon SLL conflict
  • +//
  • Pure SLL without LL fallback
  • +//
+// +//

COMBINED SLL+LL PARSING

+// +//

When LL-fallback is enabled upon SLL conflict, correct predictions are +// ensured regardless of how the termination condition is computed by this +// method. Due to the substantially higher cost of LL prediction, the +// prediction should only fall back to LL when the additional lookahead +// cannot lead to a unique SLL prediction.

+// +//

Assuming combined SLL+LL parsing, an SLL configuration set with only +// conflicting subsets should fall back to full LL, even if the +// configuration sets don't resolve to the same alternative (e.g. +// {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting +// configuration, SLL could continue with the hopes that more lookahead will +// resolve via one of those non-conflicting configurations.

+// +//

Here's the prediction termination rule them: SLL (for SLL+LL parsing) +// stops when it sees only conflicting configuration subsets. In contrast, +// full LL keeps going when there is uncertainty.

+// +//

HEURISTIC

+// +//

As a heuristic, we stop prediction when we see any conflicting subset +// unless we see a state that only has one alternative associated with it. +// The single-alt-state thing lets prediction continue upon rules like +// (otherwise, it would admit defeat too soon):

+// +//

{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ” }

+// +//

When the ATN simulation reaches the state before {@code ”}, it has a +// DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally +// {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop +// processing this node because alternative to has another way to continue, +// via {@code [6|2|[]]}.

+// +//

It also let's us continue for this rule:

+// +//

{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B }

+// +//

After Matching input A, we reach the stop state for rule A, state 1. +// State 8 is the state right before B. Clearly alternatives 1 and 2 +// conflict and no amount of further lookahead will separate the two. +// However, alternative 3 will be able to continue and so we do not stop +// working on this state. In the previous example, we're concerned with +// states associated with the conflicting alternatives. Here alt 3 is not +// associated with the conflicting configs, but since we can continue +// looking for input reasonably, don't declare the state done.

+// +//

PURE SLL PARSING

+// +//

To handle pure SLL parsing, all we have to do is make sure that we +// combine stack contexts for configurations that differ only by semantic +// predicate. From there, we can do the usual SLL termination heuristic.

+// +//

PREDICATES IN SLL+LL PARSING

+// +//

SLL decisions don't evaluate predicates until after they reach DFA stop +// states because they need to create the DFA cache that works in all +// semantic situations. In contrast, full LL evaluates predicates collected +// during start state computation so it can ignore predicates thereafter. +// This means that SLL termination detection can totally ignore semantic +// predicates.

+// +//

Implementation-wise, {@link ATNConfigSet} combines stack contexts but not +// semantic predicate contexts so we might see two configurations like the +// following.

+// +//

{@code (s, 1, x, {}), (s, 1, x', {p})}

+// +//

Before testing these configurations against others, we have to merge +// {@code x} and {@code x'} (without modifying the existing configurations). +// For example, we test {@code (x+x')==x”} when looking for conflicts in +// the following configurations.

+// +//

{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x”, {})}

+// +//

If the configuration set has predicates (as indicated by +// {@link ATNConfigSet//hasSemanticContext}), this algorithm makes a copy of +// the configurations to strip out all of the predicates so that a standard +// {@link ATNConfigSet} will merge everything ignoring predicates.

+func PredictionModehasSLLConflictTerminatingPrediction(mode int, configs ATNConfigSet) bool { + // Configs in rule stop states indicate reaching the end of the decision + // rule (local context) or end of start rule (full context). If all + // configs meet this condition, then none of the configurations is able + // to Match additional input so we terminate prediction. + // + if PredictionModeallConfigsInRuleStopStates(configs) { + return true + } + // pure SLL mode parsing + if mode == PredictionModeSLL { + // Don't bother with combining configs from different semantic + // contexts if we can fail over to full LL costs more time + // since we'll often fail over anyway. + if configs.HasSemanticContext() { + // dup configs, tossing out semantic predicates + dup := NewBaseATNConfigSet(false) + for _, c := range configs.GetItems() { + + // NewBaseATNConfig({semanticContext:}, c) + c = NewBaseATNConfig2(c, SemanticContextNone) + dup.Add(c, nil) + } + configs = dup + } + // now we have combined contexts for configs with dissimilar preds + } + // pure SLL or combined SLL+LL mode parsing + altsets := PredictionModegetConflictingAltSubsets(configs) + return PredictionModehasConflictingAltSet(altsets) && !PredictionModehasStateAssociatedWithOneAlt(configs) +} + +// Checks if any configuration in {@code configs} is in a +// {@link RuleStopState}. Configurations meeting this condition have reached +// the end of the decision rule (local context) or end of start rule (full +// context). +// +// @param configs the configuration set to test +// @return {@code true} if any configuration in {@code configs} is in a +// {@link RuleStopState}, otherwise {@code false} +func PredictionModehasConfigInRuleStopState(configs ATNConfigSet) bool { + for _, c := range configs.GetItems() { + if _, ok := c.GetState().(*RuleStopState); ok { + return true + } + } + return false +} + +// Checks if all configurations in {@code configs} are in a +// {@link RuleStopState}. Configurations meeting this condition have reached +// the end of the decision rule (local context) or end of start rule (full +// context). +// +// @param configs the configuration set to test +// @return {@code true} if all configurations in {@code configs} are in a +// {@link RuleStopState}, otherwise {@code false} +func PredictionModeallConfigsInRuleStopStates(configs ATNConfigSet) bool { + + for _, c := range configs.GetItems() { + if _, ok := c.GetState().(*RuleStopState); !ok { + return false + } + } + return true +} + +// Full LL prediction termination. +// +//

Can we stop looking ahead during ATN simulation or is there some +// uncertainty as to which alternative we will ultimately pick, after +// consuming more input? Even if there are partial conflicts, we might know +// that everything is going to resolve to the same minimum alternative. That +// means we can stop since no more lookahead will change that fact. On the +// other hand, there might be multiple conflicts that resolve to different +// minimums. That means we need more look ahead to decide which of those +// alternatives we should predict.

+// +//

The basic idea is to split the set of configurations {@code C}, into +// conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with +// non-conflicting configurations. Two configurations conflict if they have +// identical {@link ATNConfig//state} and {@link ATNConfig//context} values +// but different {@link ATNConfig//alt} value, e.g. {@code (s, i, ctx, _)} +// and {@code (s, j, ctx, _)} for {@code i!=j}.

+// +//

Reduce these configuration subsets to the set of possible alternatives. +// You can compute the alternative subsets in one pass as follows:

+// +//

{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in +// {@code C} holding {@code s} and {@code ctx} fixed.

+// +//

Or in pseudo-code, for each configuration {@code c} in {@code C}:

+// +//
+// map[c] U= c.{@link ATNConfig//alt alt} // map hash/equals uses s and x, not
+// alt and not pred
+// 
+// +//

The values in {@code map} are the set of {@code A_s,ctx} sets.

+// +//

If {@code |A_s,ctx|=1} then there is no conflict associated with +// {@code s} and {@code ctx}.

+// +//

Reduce the subsets to singletons by choosing a minimum of each subset. If +// the union of these alternative subsets is a singleton, then no amount of +// more lookahead will help us. We will always pick that alternative. If, +// however, there is more than one alternative, then we are uncertain which +// alternative to predict and must continue looking for resolution. We may +// or may not discover an ambiguity in the future, even if there are no +// conflicting subsets this round.

+// +//

The biggest sin is to terminate early because it means we've made a +// decision but were uncertain as to the eventual outcome. We haven't used +// enough lookahead. On the other hand, announcing a conflict too late is no +// big deal you will still have the conflict. It's just inefficient. It +// might even look until the end of file.

+// +//

No special consideration for semantic predicates is required because +// predicates are evaluated on-the-fly for full LL prediction, ensuring that +// no configuration contains a semantic context during the termination +// check.

+// +//

CONFLICTING CONFIGS

+// +//

Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict +// when {@code i!=j} but {@code x=x'}. Because we merge all +// {@code (s, i, _)} configurations together, that means that there are at +// most {@code n} configurations associated with state {@code s} for +// {@code n} possible alternatives in the decision. The merged stacks +// complicate the comparison of configuration contexts {@code x} and +// {@code x'}. Sam checks to see if one is a subset of the other by calling +// merge and checking to see if the merged result is either {@code x} or +// {@code x'}. If the {@code x} associated with lowest alternative {@code i} +// is the superset, then {@code i} is the only possible prediction since the +// others resolve to {@code min(i)} as well. However, if {@code x} is +// associated with {@code j>i} then at least one stack configuration for +// {@code j} is not in conflict with alternative {@code i}. The algorithm +// should keep going, looking for more lookahead due to the uncertainty.

+// +//

For simplicity, I'm doing a equality check between {@code x} and +// {@code x'} that lets the algorithm continue to consume lookahead longer +// than necessary. The reason I like the equality is of course the +// simplicity but also because that is the test you need to detect the +// alternatives that are actually in conflict.

+// +//

CONTINUE/STOP RULE

+// +//

Continue if union of resolved alternative sets from non-conflicting and +// conflicting alternative subsets has more than one alternative. We are +// uncertain about which alternative to predict.

+// +//

The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which +// alternatives are still in the running for the amount of input we've +// consumed at this point. The conflicting sets let us to strip away +// configurations that won't lead to more states because we resolve +// conflicts to the configuration with a minimum alternate for the +// conflicting set.

+// +//

CASES

+// +//
    +// +//
  • no conflicts and more than 1 alternative in set => continue
  • +// +//
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s, 3, z)}, +// {@code (s', 1, y)}, {@code (s', 2, y)} yields non-conflicting set +// {@code {3}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = +// {@code {1,3}} => continue +//
  • +// +//
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, +// {@code (s', 2, y)}, {@code (s”, 1, z)} yields non-conflicting set +// {@code {1}} U conflicting sets {@code min({1,2})} U {@code min({1,2})} = +// {@code {1}} => stop and predict 1
  • +// +//
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 1, y)}, +// {@code (s', 2, y)} yields conflicting, reduced sets {@code {1}} U +// {@code {1}} = {@code {1}} => stop and predict 1, can announce +// ambiguity {@code {1,2}}
  • +// +//
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 2, y)}, +// {@code (s', 3, y)} yields conflicting, reduced sets {@code {1}} U +// {@code {2}} = {@code {1,2}} => continue
  • +// +//
  • {@code (s, 1, x)}, {@code (s, 2, x)}, {@code (s', 3, y)}, +// {@code (s', 4, y)} yields conflicting, reduced sets {@code {1}} U +// {@code {3}} = {@code {1,3}} => continue
  • +// +//
+// +//

EXACT AMBIGUITY DETECTION

+// +//

If all states Report the same conflicting set of alternatives, then we +// know we have the exact ambiguity set.

+// +//

|A_i|>1 and +// A_i = A_j for all i, j.

+// +//

In other words, we continue examining lookahead until all {@code A_i} +// have more than one alternative and all {@code A_i} are the same. If +// {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate +// because the resolved set is {@code {1}}. To determine what the real +// ambiguity is, we have to know whether the ambiguity is between one and +// two or one and three so we keep going. We can only stop prediction when +// we need exact ambiguity detection when the sets look like +// {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...

+func PredictionModeresolvesToJustOneViableAlt(altsets []*BitSet) int { + return PredictionModegetSingleViableAlt(altsets) +} + +// Determines if every alternative subset in {@code altsets} contains more +// than one alternative. +// +// @param altsets a collection of alternative subsets +// @return {@code true} if every {@link BitSet} in {@code altsets} has +// {@link BitSet//cardinality cardinality} > 1, otherwise {@code false} +func PredictionModeallSubsetsConflict(altsets []*BitSet) bool { + return !PredictionModehasNonConflictingAltSet(altsets) +} + +// Determines if any single alternative subset in {@code altsets} contains +// exactly one alternative. +// +// @param altsets a collection of alternative subsets +// @return {@code true} if {@code altsets} contains a {@link BitSet} with +// {@link BitSet//cardinality cardinality} 1, otherwise {@code false} +func PredictionModehasNonConflictingAltSet(altsets []*BitSet) bool { + for i := 0; i < len(altsets); i++ { + alts := altsets[i] + if alts.length() == 1 { + return true + } + } + return false +} + +// Determines if any single alternative subset in {@code altsets} contains +// more than one alternative. +// +// @param altsets a collection of alternative subsets +// @return {@code true} if {@code altsets} contains a {@link BitSet} with +// {@link BitSet//cardinality cardinality} > 1, otherwise {@code false} +func PredictionModehasConflictingAltSet(altsets []*BitSet) bool { + for i := 0; i < len(altsets); i++ { + alts := altsets[i] + if alts.length() > 1 { + return true + } + } + return false +} + +// Determines if every alternative subset in {@code altsets} is equivalent. +// +// @param altsets a collection of alternative subsets +// @return {@code true} if every member of {@code altsets} is equal to the +// others, otherwise {@code false} +func PredictionModeallSubsetsEqual(altsets []*BitSet) bool { + var first *BitSet + + for i := 0; i < len(altsets); i++ { + alts := altsets[i] + if first == nil { + first = alts + } else if alts != first { + return false + } + } + + return true +} + +// Returns the unique alternative predicted by all alternative subsets in +// {@code altsets}. If no such alternative exists, this method returns +// {@link ATN//INVALID_ALT_NUMBER}. +// +// @param altsets a collection of alternative subsets +func PredictionModegetUniqueAlt(altsets []*BitSet) int { + all := PredictionModeGetAlts(altsets) + if all.length() == 1 { + return all.minValue() + } + + return ATNInvalidAltNumber +} + +// Gets the complete set of represented alternatives for a collection of +// alternative subsets. This method returns the union of each {@link BitSet} +// in {@code altsets}. +// +// @param altsets a collection of alternative subsets +// @return the set of represented alternatives in {@code altsets} +func PredictionModeGetAlts(altsets []*BitSet) *BitSet { + all := NewBitSet() + for _, alts := range altsets { + all.or(alts) + } + return all +} + +// PredictionModegetConflictingAltSubsets gets the conflicting alt subsets from a configuration set. +// For each configuration {@code c} in {@code configs}: +// +//
+// map[c] U= c.{@link ATNConfig//alt alt} // map hash/equals uses s and x, not
+// alt and not pred
+// 
+func PredictionModegetConflictingAltSubsets(configs ATNConfigSet) []*BitSet { + configToAlts := NewJMap[ATNConfig, *BitSet, *ATNAltConfigComparator[ATNConfig]](&ATNAltConfigComparator[ATNConfig]{}) + + for _, c := range configs.GetItems() { + + alts, ok := configToAlts.Get(c) + if !ok { + alts = NewBitSet() + configToAlts.Put(c, alts) + } + alts.add(c.GetAlt()) + } + + return configToAlts.Values() +} + +// PredictionModeGetStateToAltMap gets a map from state to alt subset from a configuration set. For each +// configuration {@code c} in {@code configs}: +// +//
+// map[c.{@link ATNConfig//state state}] U= c.{@link ATNConfig//alt alt}
+// 
+func PredictionModeGetStateToAltMap(configs ATNConfigSet) *AltDict { + m := NewAltDict() + + for _, c := range configs.GetItems() { + alts := m.Get(c.GetState().String()) + if alts == nil { + alts = NewBitSet() + m.put(c.GetState().String(), alts) + } + alts.(*BitSet).add(c.GetAlt()) + } + return m +} + +func PredictionModehasStateAssociatedWithOneAlt(configs ATNConfigSet) bool { + values := PredictionModeGetStateToAltMap(configs).values() + for i := 0; i < len(values); i++ { + if values[i].(*BitSet).length() == 1 { + return true + } + } + return false +} + +func PredictionModegetSingleViableAlt(altsets []*BitSet) int { + result := ATNInvalidAltNumber + + for i := 0; i < len(altsets); i++ { + alts := altsets[i] + minAlt := alts.minValue() + if result == ATNInvalidAltNumber { + result = minAlt + } else if result != minAlt { // more than 1 viable alt + return ATNInvalidAltNumber + } + } + return result +} diff --git a/runtime/Go/antlr/v4/recognizer.go b/runtime/Go/antlr/v4/recognizer.go new file mode 100644 index 0000000000..63e9b08adb --- /dev/null +++ b/runtime/Go/antlr/v4/recognizer.go @@ -0,0 +1,216 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strings" + + "strconv" +) + +type Recognizer interface { + GetLiteralNames() []string + GetSymbolicNames() []string + GetRuleNames() []string + + Sempred(RuleContext, int, int) bool + Precpred(RuleContext, int) bool + + GetState() int + SetState(int) + Action(RuleContext, int, int) + AddErrorListener(ErrorListener) + RemoveErrorListeners() + GetATN() *ATN + GetErrorListenerDispatch() ErrorListener +} + +type BaseRecognizer struct { + listeners []ErrorListener + state int + + RuleNames []string + LiteralNames []string + SymbolicNames []string + GrammarFileName string +} + +func NewBaseRecognizer() *BaseRecognizer { + rec := new(BaseRecognizer) + rec.listeners = []ErrorListener{ConsoleErrorListenerINSTANCE} + rec.state = -1 + return rec +} + +var tokenTypeMapCache = make(map[string]int) +var ruleIndexMapCache = make(map[string]int) + +func (b *BaseRecognizer) checkVersion(toolVersion string) { + runtimeVersion := "4.10.1" + if runtimeVersion != toolVersion { + fmt.Println("ANTLR runtime and generated code versions disagree: " + runtimeVersion + "!=" + toolVersion) + } +} + +func (b *BaseRecognizer) Action(context RuleContext, ruleIndex, actionIndex int) { + panic("action not implemented on Recognizer!") +} + +func (b *BaseRecognizer) AddErrorListener(listener ErrorListener) { + b.listeners = append(b.listeners, listener) +} + +func (b *BaseRecognizer) RemoveErrorListeners() { + b.listeners = make([]ErrorListener, 0) +} + +func (b *BaseRecognizer) GetRuleNames() []string { + return b.RuleNames +} + +func (b *BaseRecognizer) GetTokenNames() []string { + return b.LiteralNames +} + +func (b *BaseRecognizer) GetSymbolicNames() []string { + return b.SymbolicNames +} + +func (b *BaseRecognizer) GetLiteralNames() []string { + return b.LiteralNames +} + +func (b *BaseRecognizer) GetState() int { + return b.state +} + +func (b *BaseRecognizer) SetState(v int) { + b.state = v +} + +//func (b *Recognizer) GetTokenTypeMap() { +// var tokenNames = b.GetTokenNames() +// if (tokenNames==nil) { +// panic("The current recognizer does not provide a list of token names.") +// } +// var result = tokenTypeMapCache[tokenNames] +// if(result==nil) { +// result = tokenNames.reduce(function(o, k, i) { o[k] = i }) +// result.EOF = TokenEOF +// tokenTypeMapCache[tokenNames] = result +// } +// return result +//} + +// Get a map from rule names to rule indexes. +// +//

Used for XPath and tree pattern compilation.

+func (b *BaseRecognizer) GetRuleIndexMap() map[string]int { + + panic("Method not defined!") + // var ruleNames = b.GetRuleNames() + // if (ruleNames==nil) { + // panic("The current recognizer does not provide a list of rule names.") + // } + // + // var result = ruleIndexMapCache[ruleNames] + // if(result==nil) { + // result = ruleNames.reduce(function(o, k, i) { o[k] = i }) + // ruleIndexMapCache[ruleNames] = result + // } + // return result +} + +func (b *BaseRecognizer) GetTokenType(tokenName string) int { + panic("Method not defined!") + // var ttype = b.GetTokenTypeMap()[tokenName] + // if (ttype !=nil) { + // return ttype + // } else { + // return TokenInvalidType + // } +} + +//func (b *Recognizer) GetTokenTypeMap() map[string]int { +// Vocabulary vocabulary = getVocabulary() +// +// Synchronized (tokenTypeMapCache) { +// Map result = tokenTypeMapCache.Get(vocabulary) +// if (result == null) { +// result = new HashMap() +// for (int i = 0; i < GetATN().maxTokenType; i++) { +// String literalName = vocabulary.getLiteralName(i) +// if (literalName != null) { +// result.put(literalName, i) +// } +// +// String symbolicName = vocabulary.GetSymbolicName(i) +// if (symbolicName != null) { +// result.put(symbolicName, i) +// } +// } +// +// result.put("EOF", Token.EOF) +// result = Collections.unmodifiableMap(result) +// tokenTypeMapCache.put(vocabulary, result) +// } +// +// return result +// } +//} + +// What is the error header, normally line/character position information?// +func (b *BaseRecognizer) GetErrorHeader(e RecognitionException) string { + line := e.GetOffendingToken().GetLine() + column := e.GetOffendingToken().GetColumn() + return "line " + strconv.Itoa(line) + ":" + strconv.Itoa(column) +} + +// How should a token be displayed in an error message? The default +// +// is to display just the text, but during development you might +// want to have a lot of information spit out. Override in that case +// to use t.String() (which, for CommonToken, dumps everything about +// the token). This is better than forcing you to override a method in +// your token objects because you don't have to go modify your lexer +// so that it creates a NewJava type. +// +// @deprecated This method is not called by the ANTLR 4 Runtime. Specific +// implementations of {@link ANTLRErrorStrategy} may provide a similar +// feature when necessary. For example, see +// {@link DefaultErrorStrategy//GetTokenErrorDisplay}. +func (b *BaseRecognizer) GetTokenErrorDisplay(t Token) string { + if t == nil { + return "" + } + s := t.GetText() + if s == "" { + if t.GetTokenType() == TokenEOF { + s = "" + } else { + s = "<" + strconv.Itoa(t.GetTokenType()) + ">" + } + } + s = strings.Replace(s, "\t", "\\t", -1) + s = strings.Replace(s, "\n", "\\n", -1) + s = strings.Replace(s, "\r", "\\r", -1) + + return "'" + s + "'" +} + +func (b *BaseRecognizer) GetErrorListenerDispatch() ErrorListener { + return NewProxyErrorListener(b.listeners) +} + +// subclass needs to override these if there are sempreds or actions +// that the ATN interp needs to execute +func (b *BaseRecognizer) Sempred(localctx RuleContext, ruleIndex int, actionIndex int) bool { + return true +} + +func (b *BaseRecognizer) Precpred(localctx RuleContext, precedence int) bool { + return true +} diff --git a/runtime/Go/antlr/v4/rule_context.go b/runtime/Go/antlr/v4/rule_context.go new file mode 100644 index 0000000000..210699ba23 --- /dev/null +++ b/runtime/Go/antlr/v4/rule_context.go @@ -0,0 +1,114 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +// A rule context is a record of a single rule invocation. It knows +// which context invoked it, if any. If there is no parent context, then +// naturally the invoking state is not valid. The parent link +// provides a chain upwards from the current rule invocation to the root +// of the invocation tree, forming a stack. We actually carry no +// information about the rule associated with b context (except +// when parsing). We keep only the state number of the invoking state from +// the ATN submachine that invoked b. Contrast b with the s +// pointer inside ParserRuleContext that tracks the current state +// being "executed" for the current rule. +// +// The parent contexts are useful for computing lookahead sets and +// getting error information. +// +// These objects are used during parsing and prediction. +// For the special case of parsers, we use the subclass +// ParserRuleContext. +// +// @see ParserRuleContext +// + +type RuleContext interface { + RuleNode + + GetInvokingState() int + SetInvokingState(int) + + GetRuleIndex() int + IsEmpty() bool + + GetAltNumber() int + SetAltNumber(altNumber int) + + String([]string, RuleContext) string +} + +type BaseRuleContext struct { + parentCtx RuleContext + invokingState int + RuleIndex int +} + +func NewBaseRuleContext(parent RuleContext, invokingState int) *BaseRuleContext { + + rn := new(BaseRuleContext) + + // What context invoked b rule? + rn.parentCtx = parent + + // What state invoked the rule associated with b context? + // The "return address" is the followState of invokingState + // If parent is nil, b should be -1. + if parent == nil { + rn.invokingState = -1 + } else { + rn.invokingState = invokingState + } + + return rn +} + +func (b *BaseRuleContext) GetBaseRuleContext() *BaseRuleContext { + return b +} + +func (b *BaseRuleContext) SetParent(v Tree) { + if v == nil { + b.parentCtx = nil + } else { + b.parentCtx = v.(RuleContext) + } +} + +func (b *BaseRuleContext) GetInvokingState() int { + return b.invokingState +} + +func (b *BaseRuleContext) SetInvokingState(t int) { + b.invokingState = t +} + +func (b *BaseRuleContext) GetRuleIndex() int { + return b.RuleIndex +} + +func (b *BaseRuleContext) GetAltNumber() int { + return ATNInvalidAltNumber +} + +func (b *BaseRuleContext) SetAltNumber(altNumber int) {} + +// A context is empty if there is no invoking state meaning nobody call +// current context. +func (b *BaseRuleContext) IsEmpty() bool { + return b.invokingState == -1 +} + +// Return the combined text of all child nodes. This method only considers +// tokens which have been added to the parse tree. +//

+// Since tokens on hidden channels (e.g. whitespace or comments) are not +// added to the parse trees, they will not appear in the output of b +// method. +// + +func (b *BaseRuleContext) GetParent() Tree { + return b.parentCtx +} diff --git a/runtime/Go/antlr/v4/semantic_context.go b/runtime/Go/antlr/v4/semantic_context.go new file mode 100644 index 0000000000..f54926e760 --- /dev/null +++ b/runtime/Go/antlr/v4/semantic_context.go @@ -0,0 +1,469 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" +) + +// A tree structure used to record the semantic context in which +// an ATN configuration is valid. It's either a single predicate, +// a conjunction {@code p1&&p2}, or a sum of products {@code p1||p2}. +// +//

I have scoped the {@link AND}, {@link OR}, and {@link Predicate} subclasses of +// {@link SemanticContext} within the scope of this outer class.

+// + +type SemanticContext interface { + Equals(other Collectable[SemanticContext]) bool + Hash() int + + evaluate(parser Recognizer, outerContext RuleContext) bool + evalPrecedence(parser Recognizer, outerContext RuleContext) SemanticContext + + String() string +} + +func SemanticContextandContext(a, b SemanticContext) SemanticContext { + if a == nil || a == SemanticContextNone { + return b + } + if b == nil || b == SemanticContextNone { + return a + } + result := NewAND(a, b) + if len(result.opnds) == 1 { + return result.opnds[0] + } + + return result +} + +func SemanticContextorContext(a, b SemanticContext) SemanticContext { + if a == nil { + return b + } + if b == nil { + return a + } + if a == SemanticContextNone || b == SemanticContextNone { + return SemanticContextNone + } + result := NewOR(a, b) + if len(result.opnds) == 1 { + return result.opnds[0] + } + + return result +} + +type Predicate struct { + ruleIndex int + predIndex int + isCtxDependent bool +} + +func NewPredicate(ruleIndex, predIndex int, isCtxDependent bool) *Predicate { + p := new(Predicate) + + p.ruleIndex = ruleIndex + p.predIndex = predIndex + p.isCtxDependent = isCtxDependent // e.g., $i ref in pred + return p +} + +//The default {@link SemanticContext}, which is semantically equivalent to +//a predicate of the form {@code {true}?}. + +var SemanticContextNone = NewPredicate(-1, -1, false) + +func (p *Predicate) evalPrecedence(parser Recognizer, outerContext RuleContext) SemanticContext { + return p +} + +func (p *Predicate) evaluate(parser Recognizer, outerContext RuleContext) bool { + + var localctx RuleContext + + if p.isCtxDependent { + localctx = outerContext + } + + return parser.Sempred(localctx, p.ruleIndex, p.predIndex) +} + +func (p *Predicate) Equals(other Collectable[SemanticContext]) bool { + if p == other { + return true + } else if _, ok := other.(*Predicate); !ok { + return false + } else { + return p.ruleIndex == other.(*Predicate).ruleIndex && + p.predIndex == other.(*Predicate).predIndex && + p.isCtxDependent == other.(*Predicate).isCtxDependent + } +} + +func (p *Predicate) Hash() int { + h := murmurInit(0) + h = murmurUpdate(h, p.ruleIndex) + h = murmurUpdate(h, p.predIndex) + if p.isCtxDependent { + h = murmurUpdate(h, 1) + } else { + h = murmurUpdate(h, 0) + } + return murmurFinish(h, 3) +} + +func (p *Predicate) String() string { + return "{" + strconv.Itoa(p.ruleIndex) + ":" + strconv.Itoa(p.predIndex) + "}?" +} + +type PrecedencePredicate struct { + precedence int +} + +func NewPrecedencePredicate(precedence int) *PrecedencePredicate { + + p := new(PrecedencePredicate) + p.precedence = precedence + + return p +} + +func (p *PrecedencePredicate) evaluate(parser Recognizer, outerContext RuleContext) bool { + return parser.Precpred(outerContext, p.precedence) +} + +func (p *PrecedencePredicate) evalPrecedence(parser Recognizer, outerContext RuleContext) SemanticContext { + if parser.Precpred(outerContext, p.precedence) { + return SemanticContextNone + } + + return nil +} + +func (p *PrecedencePredicate) compareTo(other *PrecedencePredicate) int { + return p.precedence - other.precedence +} + +func (p *PrecedencePredicate) Equals(other Collectable[SemanticContext]) bool { + + var op *PrecedencePredicate + var ok bool + if op, ok = other.(*PrecedencePredicate); !ok { + return false + } + + if p == op { + return true + } + + return p.precedence == other.(*PrecedencePredicate).precedence +} + +func (p *PrecedencePredicate) Hash() int { + h := uint32(1) + h = 31*h + uint32(p.precedence) + return int(h) +} + +func (p *PrecedencePredicate) String() string { + return "{" + strconv.Itoa(p.precedence) + ">=prec}?" +} + +func PrecedencePredicatefilterPrecedencePredicates(set *JStore[SemanticContext, Comparator[SemanticContext]]) []*PrecedencePredicate { + result := make([]*PrecedencePredicate, 0) + + set.Each(func(v SemanticContext) bool { + if c2, ok := v.(*PrecedencePredicate); ok { + result = append(result, c2) + } + return true + }) + + return result +} + +// A semantic context which is true whenever none of the contained contexts +// is false.` + +type AND struct { + opnds []SemanticContext +} + +func NewAND(a, b SemanticContext) *AND { + + operands := NewJStore[SemanticContext, Comparator[SemanticContext]](&ObjEqComparator[SemanticContext]{}) + if aa, ok := a.(*AND); ok { + for _, o := range aa.opnds { + operands.Put(o) + } + } else { + operands.Put(a) + } + + if ba, ok := b.(*AND); ok { + for _, o := range ba.opnds { + operands.Put(o) + } + } else { + operands.Put(b) + } + precedencePredicates := PrecedencePredicatefilterPrecedencePredicates(operands) + if len(precedencePredicates) > 0 { + // interested in the transition with the lowest precedence + var reduced *PrecedencePredicate + + for _, p := range precedencePredicates { + if reduced == nil || p.precedence < reduced.precedence { + reduced = p + } + } + + operands.Put(reduced) + } + + vs := operands.Values() + opnds := make([]SemanticContext, len(vs)) + for i, v := range vs { + opnds[i] = v.(SemanticContext) + } + + and := new(AND) + and.opnds = opnds + + return and +} + +func (a *AND) Equals(other Collectable[SemanticContext]) bool { + if a == other { + return true + } + if _, ok := other.(*AND); !ok { + return false + } else { + for i, v := range other.(*AND).opnds { + if !a.opnds[i].Equals(v) { + return false + } + } + return true + } +} + +// {@inheritDoc} +// +//

+// The evaluation of predicates by a context is short-circuiting, but +// unordered.

+func (a *AND) evaluate(parser Recognizer, outerContext RuleContext) bool { + for i := 0; i < len(a.opnds); i++ { + if !a.opnds[i].evaluate(parser, outerContext) { + return false + } + } + return true +} + +func (a *AND) evalPrecedence(parser Recognizer, outerContext RuleContext) SemanticContext { + differs := false + operands := make([]SemanticContext, 0) + + for i := 0; i < len(a.opnds); i++ { + context := a.opnds[i] + evaluated := context.evalPrecedence(parser, outerContext) + differs = differs || (evaluated != context) + if evaluated == nil { + // The AND context is false if any element is false + return nil + } else if evaluated != SemanticContextNone { + // Reduce the result by Skipping true elements + operands = append(operands, evaluated) + } + } + if !differs { + return a + } + + if len(operands) == 0 { + // all elements were true, so the AND context is true + return SemanticContextNone + } + + var result SemanticContext + + for _, o := range operands { + if result == nil { + result = o + } else { + result = SemanticContextandContext(result, o) + } + } + + return result +} + +func (a *AND) Hash() int { + h := murmurInit(37) // Init with a value different from OR + for _, op := range a.opnds { + h = murmurUpdate(h, op.Hash()) + } + return murmurFinish(h, len(a.opnds)) +} + +func (a *OR) Hash() int { + h := murmurInit(41) // Init with a value different from AND + for _, op := range a.opnds { + h = murmurUpdate(h, op.Hash()) + } + return murmurFinish(h, len(a.opnds)) +} + +func (a *AND) String() string { + s := "" + + for _, o := range a.opnds { + s += "&& " + fmt.Sprint(o) + } + + if len(s) > 3 { + return s[0:3] + } + + return s +} + +// +// A semantic context which is true whenever at least one of the contained +// contexts is true. +// + +type OR struct { + opnds []SemanticContext +} + +func NewOR(a, b SemanticContext) *OR { + + operands := NewJStore[SemanticContext, Comparator[SemanticContext]](&ObjEqComparator[SemanticContext]{}) + if aa, ok := a.(*OR); ok { + for _, o := range aa.opnds { + operands.Put(o) + } + } else { + operands.Put(a) + } + + if ba, ok := b.(*OR); ok { + for _, o := range ba.opnds { + operands.Put(o) + } + } else { + operands.Put(b) + } + precedencePredicates := PrecedencePredicatefilterPrecedencePredicates(operands) + if len(precedencePredicates) > 0 { + // interested in the transition with the lowest precedence + var reduced *PrecedencePredicate + + for _, p := range precedencePredicates { + if reduced == nil || p.precedence > reduced.precedence { + reduced = p + } + } + + operands.Put(reduced) + } + + vs := operands.Values() + + opnds := make([]SemanticContext, len(vs)) + for i, v := range vs { + opnds[i] = v.(SemanticContext) + } + + o := new(OR) + o.opnds = opnds + + return o +} + +func (o *OR) Equals(other Collectable[SemanticContext]) bool { + if o == other { + return true + } else if _, ok := other.(*OR); !ok { + return false + } else { + for i, v := range other.(*OR).opnds { + if !o.opnds[i].Equals(v) { + return false + } + } + return true + } +} + +//

+// The evaluation of predicates by o context is short-circuiting, but +// unordered.

+func (o *OR) evaluate(parser Recognizer, outerContext RuleContext) bool { + for i := 0; i < len(o.opnds); i++ { + if o.opnds[i].evaluate(parser, outerContext) { + return true + } + } + return false +} + +func (o *OR) evalPrecedence(parser Recognizer, outerContext RuleContext) SemanticContext { + differs := false + operands := make([]SemanticContext, 0) + for i := 0; i < len(o.opnds); i++ { + context := o.opnds[i] + evaluated := context.evalPrecedence(parser, outerContext) + differs = differs || (evaluated != context) + if evaluated == SemanticContextNone { + // The OR context is true if any element is true + return SemanticContextNone + } else if evaluated != nil { + // Reduce the result by Skipping false elements + operands = append(operands, evaluated) + } + } + if !differs { + return o + } + if len(operands) == 0 { + // all elements were false, so the OR context is false + return nil + } + var result SemanticContext + + for _, o := range operands { + if result == nil { + result = o + } else { + result = SemanticContextorContext(result, o) + } + } + + return result +} + +func (o *OR) String() string { + s := "" + + for _, o := range o.opnds { + s += "|| " + fmt.Sprint(o) + } + + if len(s) > 3 { + return s[0:3] + } + + return s +} diff --git a/runtime/Go/antlr/v4/testing_assert_test.go b/runtime/Go/antlr/v4/testing_assert_test.go new file mode 100644 index 0000000000..4a402a34f3 --- /dev/null +++ b/runtime/Go/antlr/v4/testing_assert_test.go @@ -0,0 +1,98 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +// These assert functions are borrowed from https://github.com/stretchr/testify/ (MIT License) + +package antlr + +import ( + "fmt" + "reflect" + "testing" +) + +type assert struct { + t *testing.T +} + +func assertNew(t *testing.T) *assert { + return &assert{ + t: t, + } +} + +func (a *assert) Equal(expected, actual interface{}) bool { + if !objectsAreEqual(expected, actual) { + return a.Fail(fmt.Sprintf("Not equal:\n"+ + "expected: %#v\n"+ + " actual: %#v\n", expected, actual)) + } + return true +} + +func objectsAreEqual(expected, actual interface{}) bool { + if expected == nil || actual == nil { + return expected == actual + } + return reflect.DeepEqual(expected, actual) +} + +func (a *assert) Nil(object interface{}) bool { + if isNil(object) { + return true + } + return a.Fail(fmt.Sprintf("Expected nil, but got: %#v", object)) +} + +func (a *assert) NotNil(object interface{}) bool { + if !isNil(object) { + return true + } + return a.Fail("Expected value not to be nil.") +} + +// isNil checks if a specified object is nil or not, without Failing. +func isNil(object interface{}) bool { + if object == nil { + return true + } + + value := reflect.ValueOf(object) + kind := value.Kind() + if kind >= reflect.Chan && kind <= reflect.Slice && value.IsNil() { + return true + } + + return false +} + +func (a *assert) Panics(f func()) bool { + if funcDidPanic, panicValue := didPanic(f); !funcDidPanic { + return a.Fail(fmt.Sprintf("func %p should panic\n\r\tPanic value:\t%v", f, panicValue)) + } + + return true +} + +// Fail reports a failure through +func (a *assert) Fail(failureMessage string) bool { + a.t.Errorf("%s", failureMessage) + return false +} + +// didPanic returns true if the function passed to it panics. Otherwise, it returns false. +func didPanic(f func()) (bool, interface{}) { + didPanic := false + var message interface{} + func() { + defer func() { + if message = recover(); message != nil { + didPanic = true + } + }() + // call the target function + f() + }() + return didPanic, message +} diff --git a/runtime/Go/antlr/v4/testing_lexer_b_test.go b/runtime/Go/antlr/v4/testing_lexer_b_test.go new file mode 100644 index 0000000000..2485abf780 --- /dev/null +++ b/runtime/Go/antlr/v4/testing_lexer_b_test.go @@ -0,0 +1,137 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +/* +LexerB is a lexer for testing purpose. + +This file is generated from this grammer. + +lexer grammar LexerB; + +ID : 'a'..'z'+; +INT : '0'..'9'+; +SEMI : ';'; +ASSIGN : '='; +PLUS : '+'; +MULT : '*'; +WS : ' '+; +*/ + +import ( + "fmt" + "sync" + "unicode" +) + +// Suppress unused import error +var _ = fmt.Printf +var _ = sync.Once{} +var _ = unicode.IsLetter + +type LexerB struct { + *BaseLexer + channelNames []string + modeNames []string + // TODO: EOF string +} + +var lexerbLexerStaticData struct { + once sync.Once + serializedATN []int32 + channelNames []string + modeNames []string + literalNames []string + symbolicNames []string + ruleNames []string + predictionContextCache *PredictionContextCache + atn *ATN + decisionToDFA []*DFA +} + +func lexerbLexerInit() { + staticData := &lexerbLexerStaticData + staticData.channelNames = []string{ + "DEFAULT_TOKEN_CHANNEL", "HIDDEN", + } + staticData.modeNames = []string{ + "DEFAULT_MODE", + } + staticData.literalNames = []string{ + "", "", "", "';'", "'='", "'+'", "'*'", + } + staticData.symbolicNames = []string{ + "", "ID", "INT", "SEMI", "ASSIGN", "PLUS", "MULT", "WS", + } + staticData.ruleNames = []string{ + "ID", "INT", "SEMI", "ASSIGN", "PLUS", "MULT", "WS", + } + staticData.predictionContextCache = NewPredictionContextCache() + staticData.serializedATN = []int32{ + 4, 0, 7, 38, 6, -1, 2, 0, 7, 0, 2, 1, 7, 1, 2, 2, 7, 2, 2, 3, 7, 3, 2, + 4, 7, 4, 2, 5, 7, 5, 2, 6, 7, 6, 1, 0, 4, 0, 17, 8, 0, 11, 0, 12, 0, 18, + 1, 1, 4, 1, 22, 8, 1, 11, 1, 12, 1, 23, 1, 2, 1, 2, 1, 3, 1, 3, 1, 4, 1, + 4, 1, 5, 1, 5, 1, 6, 4, 6, 35, 8, 6, 11, 6, 12, 6, 36, 0, 0, 7, 1, 1, 3, + 2, 5, 3, 7, 4, 9, 5, 11, 6, 13, 7, 1, 0, 0, 40, 0, 1, 1, 0, 0, 0, 0, 3, + 1, 0, 0, 0, 0, 5, 1, 0, 0, 0, 0, 7, 1, 0, 0, 0, 0, 9, 1, 0, 0, 0, 0, 11, + 1, 0, 0, 0, 0, 13, 1, 0, 0, 0, 1, 16, 1, 0, 0, 0, 3, 21, 1, 0, 0, 0, 5, + 25, 1, 0, 0, 0, 7, 27, 1, 0, 0, 0, 9, 29, 1, 0, 0, 0, 11, 31, 1, 0, 0, + 0, 13, 34, 1, 0, 0, 0, 15, 17, 2, 97, 122, 0, 16, 15, 1, 0, 0, 0, 17, 18, + 1, 0, 0, 0, 18, 16, 1, 0, 0, 0, 18, 19, 1, 0, 0, 0, 19, 2, 1, 0, 0, 0, + 20, 22, 2, 48, 57, 0, 21, 20, 1, 0, 0, 0, 22, 23, 1, 0, 0, 0, 23, 21, 1, + 0, 0, 0, 23, 24, 1, 0, 0, 0, 24, 4, 1, 0, 0, 0, 25, 26, 5, 59, 0, 0, 26, + 6, 1, 0, 0, 0, 27, 28, 5, 61, 0, 0, 28, 8, 1, 0, 0, 0, 29, 30, 5, 43, 0, + 0, 30, 10, 1, 0, 0, 0, 31, 32, 5, 42, 0, 0, 32, 12, 1, 0, 0, 0, 33, 35, + 5, 32, 0, 0, 34, 33, 1, 0, 0, 0, 35, 36, 1, 0, 0, 0, 36, 34, 1, 0, 0, 0, + 36, 37, 1, 0, 0, 0, 37, 14, 1, 0, 0, 0, 4, 0, 18, 23, 36, 0, + } + deserializer := NewATNDeserializer(nil) + staticData.atn = deserializer.Deserialize(staticData.serializedATN) + atn := staticData.atn + staticData.decisionToDFA = make([]*DFA, len(atn.DecisionToState)) + decisionToDFA := staticData.decisionToDFA + for index, state := range atn.DecisionToState { + decisionToDFA[index] = NewDFA(state, index) + } +} + +// LexerBInit initializes any static state used to implement LexerB. By default the +// static state used to implement the lexer is lazily initialized during the first call to +// NewLexerB(). You can call this function if you wish to initialize the static state ahead +// of time. +func LexerBInit() { + staticData := &lexerbLexerStaticData + staticData.once.Do(lexerbLexerInit) +} + +// NewLexerB produces a new lexer instance for the optional input antlr.CharStream. +func NewLexerB(input CharStream) *LexerB { + LexerBInit() + l := new(LexerB) + + l.BaseLexer = NewBaseLexer(input) + staticData := &lexerbLexerStaticData + l.Interpreter = NewLexerATNSimulator(l, staticData.atn, staticData.decisionToDFA, staticData.predictionContextCache) + l.channelNames = staticData.channelNames + l.modeNames = staticData.modeNames + l.RuleNames = staticData.ruleNames + l.LiteralNames = staticData.literalNames + l.SymbolicNames = staticData.symbolicNames + l.GrammarFileName = "LexerB.g4" + // TODO: l.EOF = antlr.TokenEOF + + return l +} + +// LexerB tokens. +const ( + LexerBID = 1 + LexerBINT = 2 + LexerBSEMI = 3 + LexerBASSIGN = 4 + LexerBPLUS = 5 + LexerBMULT = 6 + LexerBWS = 7 +) diff --git a/runtime/Go/antlr/v4/testing_util_test.go b/runtime/Go/antlr/v4/testing_util_test.go new file mode 100644 index 0000000000..20428831b3 --- /dev/null +++ b/runtime/Go/antlr/v4/testing_util_test.go @@ -0,0 +1,30 @@ +package antlr + +import ( + "fmt" + "strings" +) + +// newTestCommonToken create common token with tokentype, text and channel +// notice: test purpose only +func newTestCommonToken(tokenType int, text string, channel int) *CommonToken { + t := new(CommonToken) + t.BaseToken = new(BaseToken) + t.tokenType = tokenType + t.channel = channel + t.text = text + t.line = 0 + t.column = -1 + return t +} + +// tokensToString returnes []Tokens string +// notice: test purpose only +func tokensToString(tokens []Token) string { + buf := make([]string, len(tokens)) + for i, token := range tokens { + buf[i] = fmt.Sprintf("%v", token) + } + + return "[" + strings.Join(buf, ", ") + "]" +} diff --git a/runtime/Go/antlr/v4/token.go b/runtime/Go/antlr/v4/token.go new file mode 100644 index 0000000000..f73b06bc6a --- /dev/null +++ b/runtime/Go/antlr/v4/token.go @@ -0,0 +1,209 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "strconv" + "strings" +) + +type TokenSourceCharStreamPair struct { + tokenSource TokenSource + charStream CharStream +} + +// A token has properties: text, type, line, character position in the line +// (so we can ignore tabs), token channel, index, and source from which +// we obtained this token. + +type Token interface { + GetSource() *TokenSourceCharStreamPair + GetTokenType() int + GetChannel() int + GetStart() int + GetStop() int + GetLine() int + GetColumn() int + + GetText() string + SetText(s string) + + GetTokenIndex() int + SetTokenIndex(v int) + + GetTokenSource() TokenSource + GetInputStream() CharStream +} + +type BaseToken struct { + source *TokenSourceCharStreamPair + tokenType int // token type of the token + channel int // The parser ignores everything not on DEFAULT_CHANNEL + start int // optional return -1 if not implemented. + stop int // optional return -1 if not implemented. + tokenIndex int // from 0..n-1 of the token object in the input stream + line int // line=1..n of the 1st character + column int // beginning of the line at which it occurs, 0..n-1 + text string // text of the token. + readOnly bool +} + +const ( + TokenInvalidType = 0 + + // During lookahead operations, this "token" signifies we hit rule end ATN state + // and did not follow it despite needing to. + TokenEpsilon = -2 + + TokenMinUserTokenType = 1 + + TokenEOF = -1 + + // All tokens go to the parser (unless Skip() is called in that rule) + // on a particular "channel". The parser tunes to a particular channel + // so that whitespace etc... can go to the parser on a "hidden" channel. + + TokenDefaultChannel = 0 + + // Anything on different channel than DEFAULT_CHANNEL is not parsed + // by parser. + + TokenHiddenChannel = 1 +) + +func (b *BaseToken) GetChannel() int { + return b.channel +} + +func (b *BaseToken) GetStart() int { + return b.start +} + +func (b *BaseToken) GetStop() int { + return b.stop +} + +func (b *BaseToken) GetLine() int { + return b.line +} + +func (b *BaseToken) GetColumn() int { + return b.column +} + +func (b *BaseToken) GetTokenType() int { + return b.tokenType +} + +func (b *BaseToken) GetSource() *TokenSourceCharStreamPair { + return b.source +} + +func (b *BaseToken) GetTokenIndex() int { + return b.tokenIndex +} + +func (b *BaseToken) SetTokenIndex(v int) { + b.tokenIndex = v +} + +func (b *BaseToken) GetTokenSource() TokenSource { + return b.source.tokenSource +} + +func (b *BaseToken) GetInputStream() CharStream { + return b.source.charStream +} + +type CommonToken struct { + *BaseToken +} + +func NewCommonToken(source *TokenSourceCharStreamPair, tokenType, channel, start, stop int) *CommonToken { + + t := new(CommonToken) + + t.BaseToken = new(BaseToken) + + t.source = source + t.tokenType = tokenType + t.channel = channel + t.start = start + t.stop = stop + t.tokenIndex = -1 + if t.source.tokenSource != nil { + t.line = source.tokenSource.GetLine() + t.column = source.tokenSource.GetCharPositionInLine() + } else { + t.column = -1 + } + return t +} + +// An empty {@link Pair} which is used as the default value of +// {@link //source} for tokens that do not have a source. + +//CommonToken.EMPTY_SOURCE = [ nil, nil ] + +// Constructs a New{@link CommonToken} as a copy of another {@link Token}. +// +//

+// If {@code oldToken} is also a {@link CommonToken} instance, the newly +// constructed token will share a reference to the {@link //text} field and +// the {@link Pair} stored in {@link //source}. Otherwise, {@link //text} will +// be assigned the result of calling {@link //GetText}, and {@link //source} +// will be constructed from the result of {@link Token//GetTokenSource} and +// {@link Token//GetInputStream}.

+// +// @param oldToken The token to copy. +func (c *CommonToken) clone() *CommonToken { + t := NewCommonToken(c.source, c.tokenType, c.channel, c.start, c.stop) + t.tokenIndex = c.GetTokenIndex() + t.line = c.GetLine() + t.column = c.GetColumn() + t.text = c.GetText() + return t +} + +func (c *CommonToken) GetText() string { + if c.text != "" { + return c.text + } + input := c.GetInputStream() + if input == nil { + return "" + } + n := input.Size() + if c.start < n && c.stop < n { + return input.GetTextFromInterval(NewInterval(c.start, c.stop)) + } + return "" +} + +func (c *CommonToken) SetText(text string) { + c.text = text +} + +func (c *CommonToken) String() string { + txt := c.GetText() + if txt != "" { + txt = strings.Replace(txt, "\n", "\\n", -1) + txt = strings.Replace(txt, "\r", "\\r", -1) + txt = strings.Replace(txt, "\t", "\\t", -1) + } else { + txt = "" + } + + var ch string + if c.channel > 0 { + ch = ",channel=" + strconv.Itoa(c.channel) + } else { + ch = "" + } + + return "[@" + strconv.Itoa(c.tokenIndex) + "," + strconv.Itoa(c.start) + ":" + strconv.Itoa(c.stop) + "='" + + txt + "',<" + strconv.Itoa(c.tokenType) + ">" + + ch + "," + strconv.Itoa(c.line) + ":" + strconv.Itoa(c.column) + "]" +} diff --git a/runtime/Go/antlr/v4/token_source.go b/runtime/Go/antlr/v4/token_source.go new file mode 100644 index 0000000000..a3f36eaa67 --- /dev/null +++ b/runtime/Go/antlr/v4/token_source.go @@ -0,0 +1,17 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +type TokenSource interface { + NextToken() Token + Skip() + More() + GetLine() int + GetCharPositionInLine() int + GetInputStream() CharStream + GetSourceName() string + setTokenFactory(factory TokenFactory) + GetTokenFactory() TokenFactory +} diff --git a/runtime/Go/antlr/v4/token_stream.go b/runtime/Go/antlr/v4/token_stream.go new file mode 100644 index 0000000000..1527d43f60 --- /dev/null +++ b/runtime/Go/antlr/v4/token_stream.go @@ -0,0 +1,20 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +type TokenStream interface { + IntStream + + LT(k int) Token + + Get(index int) Token + GetTokenSource() TokenSource + SetTokenSource(TokenSource) + + GetAllText() string + GetTextFromInterval(*Interval) string + GetTextFromRuleContext(RuleContext) string + GetTextFromTokens(Token, Token) string +} diff --git a/runtime/Go/antlr/v4/tokenstream_rewriter.go b/runtime/Go/antlr/v4/tokenstream_rewriter.go new file mode 100644 index 0000000000..b3e38af344 --- /dev/null +++ b/runtime/Go/antlr/v4/tokenstream_rewriter.go @@ -0,0 +1,659 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "bytes" + "fmt" +) + +// +// Useful for rewriting out a buffered input token stream after doing some +// augmentation or other manipulations on it. + +//

+// You can insert stuff, replace, and delete chunks. Note that the operations +// are done lazily--only if you convert the buffer to a {@link String} with +// {@link TokenStream#getText()}. This is very efficient because you are not +// moving data around all the time. As the buffer of tokens is converted to +// strings, the {@link #getText()} method(s) scan the input token stream and +// check to see if there is an operation at the current index. If so, the +// operation is done and then normal {@link String} rendering continues on the +// buffer. This is like having multiple Turing machine instruction streams +// (programs) operating on a single input tape. :)

+//

+ +// This rewriter makes no modifications to the token stream. It does not ask the +// stream to fill itself up nor does it advance the input cursor. The token +// stream {@link TokenStream#index()} will return the same value before and +// after any {@link #getText()} call.

+ +//

+// The rewriter only works on tokens that you have in the buffer and ignores the +// current input cursor. If you are buffering tokens on-demand, calling +// {@link #getText()} halfway through the input will only do rewrites for those +// tokens in the first half of the file.

+ +//

+// Since the operations are done lazily at {@link #getText}-time, operations do +// not screw up the token index values. That is, an insert operation at token +// index {@code i} does not change the index values for tokens +// {@code i}+1..n-1.

+ +//

+// Because operations never actually alter the buffer, you may always get the +// original token stream back without undoing anything. Since the instructions +// are queued up, you can easily simulate transactions and roll back any changes +// if there is an error just by removing instructions. For example,

+ +//
+// CharStream input = new ANTLRFileStream("input");
+// TLexer lex = new TLexer(input);
+// CommonTokenStream tokens = new CommonTokenStream(lex);
+// T parser = new T(tokens);
+// TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
+// parser.startRule();
+// 
+ +//

+// Then in the rules, you can execute (assuming rewriter is visible):

+ +//
+// Token t,u;
+// ...
+// rewriter.insertAfter(t, "text to put after t");}
+// rewriter.insertAfter(u, "text after u");}
+// System.out.println(rewriter.getText());
+// 
+ +//

+// You can also have multiple "instruction streams" and get multiple rewrites +// from a single pass over the input. Just name the instruction streams and use +// that name again when printing the buffer. This could be useful for generating +// a C file and also its header file--all from the same buffer:

+ +//
+// rewriter.insertAfter("pass1", t, "text to put after t");}
+// rewriter.insertAfter("pass2", u, "text after u");}
+// System.out.println(rewriter.getText("pass1"));
+// System.out.println(rewriter.getText("pass2"));
+// 
+ +//

+// If you don't use named rewrite streams, a "default" stream is used as the +// first example shows.

+ +const ( + Default_Program_Name = "default" + Program_Init_Size = 100 + Min_Token_Index = 0 +) + +// Define the rewrite operation hierarchy + +type RewriteOperation interface { + // Execute the rewrite operation by possibly adding to the buffer. + // Return the index of the next token to operate on. + Execute(buffer *bytes.Buffer) int + String() string + GetInstructionIndex() int + GetIndex() int + GetText() string + GetOpName() string + GetTokens() TokenStream + SetInstructionIndex(val int) + SetIndex(int) + SetText(string) + SetOpName(string) + SetTokens(TokenStream) +} + +type BaseRewriteOperation struct { + //Current index of rewrites list + instruction_index int + //Token buffer index + index int + //Substitution text + text string + //Actual operation name + op_name string + //Pointer to token steam + tokens TokenStream +} + +func (op *BaseRewriteOperation) GetInstructionIndex() int { + return op.instruction_index +} + +func (op *BaseRewriteOperation) GetIndex() int { + return op.index +} + +func (op *BaseRewriteOperation) GetText() string { + return op.text +} + +func (op *BaseRewriteOperation) GetOpName() string { + return op.op_name +} + +func (op *BaseRewriteOperation) GetTokens() TokenStream { + return op.tokens +} + +func (op *BaseRewriteOperation) SetInstructionIndex(val int) { + op.instruction_index = val +} + +func (op *BaseRewriteOperation) SetIndex(val int) { + op.index = val +} + +func (op *BaseRewriteOperation) SetText(val string) { + op.text = val +} + +func (op *BaseRewriteOperation) SetOpName(val string) { + op.op_name = val +} + +func (op *BaseRewriteOperation) SetTokens(val TokenStream) { + op.tokens = val +} + +func (op *BaseRewriteOperation) Execute(buffer *bytes.Buffer) int { + return op.index +} + +func (op *BaseRewriteOperation) String() string { + return fmt.Sprintf("<%s@%d:\"%s\">", + op.op_name, + op.tokens.Get(op.GetIndex()), + op.text, + ) + +} + +type InsertBeforeOp struct { + BaseRewriteOperation +} + +func NewInsertBeforeOp(index int, text string, stream TokenStream) *InsertBeforeOp { + return &InsertBeforeOp{BaseRewriteOperation: BaseRewriteOperation{ + index: index, + text: text, + op_name: "InsertBeforeOp", + tokens: stream, + }} +} + +func (op *InsertBeforeOp) Execute(buffer *bytes.Buffer) int { + buffer.WriteString(op.text) + if op.tokens.Get(op.index).GetTokenType() != TokenEOF { + buffer.WriteString(op.tokens.Get(op.index).GetText()) + } + return op.index + 1 +} + +func (op *InsertBeforeOp) String() string { + return op.BaseRewriteOperation.String() +} + +// Distinguish between insert after/before to do the "insert afters" +// first and then the "insert befores" at same index. Implementation +// of "insert after" is "insert before index+1". + +type InsertAfterOp struct { + BaseRewriteOperation +} + +func NewInsertAfterOp(index int, text string, stream TokenStream) *InsertAfterOp { + return &InsertAfterOp{BaseRewriteOperation: BaseRewriteOperation{ + index: index + 1, + text: text, + tokens: stream, + }} +} + +func (op *InsertAfterOp) Execute(buffer *bytes.Buffer) int { + buffer.WriteString(op.text) + if op.tokens.Get(op.index).GetTokenType() != TokenEOF { + buffer.WriteString(op.tokens.Get(op.index).GetText()) + } + return op.index + 1 +} + +func (op *InsertAfterOp) String() string { + return op.BaseRewriteOperation.String() +} + +// I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp +// instructions. +type ReplaceOp struct { + BaseRewriteOperation + LastIndex int +} + +func NewReplaceOp(from, to int, text string, stream TokenStream) *ReplaceOp { + return &ReplaceOp{ + BaseRewriteOperation: BaseRewriteOperation{ + index: from, + text: text, + op_name: "ReplaceOp", + tokens: stream, + }, + LastIndex: to, + } +} + +func (op *ReplaceOp) Execute(buffer *bytes.Buffer) int { + if op.text != "" { + buffer.WriteString(op.text) + } + return op.LastIndex + 1 +} + +func (op *ReplaceOp) String() string { + if op.text == "" { + return fmt.Sprintf("", + op.tokens.Get(op.index), op.tokens.Get(op.LastIndex)) + } + return fmt.Sprintf("", + op.tokens.Get(op.index), op.tokens.Get(op.LastIndex), op.text) +} + +type TokenStreamRewriter struct { + //Our source stream + tokens TokenStream + // You may have multiple, named streams of rewrite operations. + // I'm calling these things "programs." + // Maps String (name) → rewrite (List) + programs map[string][]RewriteOperation + last_rewrite_token_indexes map[string]int +} + +func NewTokenStreamRewriter(tokens TokenStream) *TokenStreamRewriter { + return &TokenStreamRewriter{ + tokens: tokens, + programs: map[string][]RewriteOperation{ + Default_Program_Name: make([]RewriteOperation, 0, Program_Init_Size), + }, + last_rewrite_token_indexes: map[string]int{}, + } +} + +func (tsr *TokenStreamRewriter) GetTokenStream() TokenStream { + return tsr.tokens +} + +// Rollback the instruction stream for a program so that +// the indicated instruction (via instructionIndex) is no +// longer in the stream. UNTESTED! +func (tsr *TokenStreamRewriter) Rollback(program_name string, instruction_index int) { + is, ok := tsr.programs[program_name] + if ok { + tsr.programs[program_name] = is[Min_Token_Index:instruction_index] + } +} + +func (tsr *TokenStreamRewriter) RollbackDefault(instruction_index int) { + tsr.Rollback(Default_Program_Name, instruction_index) +} + +// Reset the program so that no instructions exist +func (tsr *TokenStreamRewriter) DeleteProgram(program_name string) { + tsr.Rollback(program_name, Min_Token_Index) //TODO: double test on that cause lower bound is not included +} + +func (tsr *TokenStreamRewriter) DeleteProgramDefault() { + tsr.DeleteProgram(Default_Program_Name) +} + +func (tsr *TokenStreamRewriter) InsertAfter(program_name string, index int, text string) { + // to insert after, just insert before next index (even if past end) + var op RewriteOperation = NewInsertAfterOp(index, text, tsr.tokens) + rewrites := tsr.GetProgram(program_name) + op.SetInstructionIndex(len(rewrites)) + tsr.AddToProgram(program_name, op) +} + +func (tsr *TokenStreamRewriter) InsertAfterDefault(index int, text string) { + tsr.InsertAfter(Default_Program_Name, index, text) +} + +func (tsr *TokenStreamRewriter) InsertAfterToken(program_name string, token Token, text string) { + tsr.InsertAfter(program_name, token.GetTokenIndex(), text) +} + +func (tsr *TokenStreamRewriter) InsertBefore(program_name string, index int, text string) { + var op RewriteOperation = NewInsertBeforeOp(index, text, tsr.tokens) + rewrites := tsr.GetProgram(program_name) + op.SetInstructionIndex(len(rewrites)) + tsr.AddToProgram(program_name, op) +} + +func (tsr *TokenStreamRewriter) InsertBeforeDefault(index int, text string) { + tsr.InsertBefore(Default_Program_Name, index, text) +} + +func (tsr *TokenStreamRewriter) InsertBeforeToken(program_name string, token Token, text string) { + tsr.InsertBefore(program_name, token.GetTokenIndex(), text) +} + +func (tsr *TokenStreamRewriter) Replace(program_name string, from, to int, text string) { + if from > to || from < 0 || to < 0 || to >= tsr.tokens.Size() { + panic(fmt.Sprintf("replace: range invalid: %d..%d(size=%d)", + from, to, tsr.tokens.Size())) + } + var op RewriteOperation = NewReplaceOp(from, to, text, tsr.tokens) + rewrites := tsr.GetProgram(program_name) + op.SetInstructionIndex(len(rewrites)) + tsr.AddToProgram(program_name, op) +} + +func (tsr *TokenStreamRewriter) ReplaceDefault(from, to int, text string) { + tsr.Replace(Default_Program_Name, from, to, text) +} + +func (tsr *TokenStreamRewriter) ReplaceDefaultPos(index int, text string) { + tsr.ReplaceDefault(index, index, text) +} + +func (tsr *TokenStreamRewriter) ReplaceToken(program_name string, from, to Token, text string) { + tsr.Replace(program_name, from.GetTokenIndex(), to.GetTokenIndex(), text) +} + +func (tsr *TokenStreamRewriter) ReplaceTokenDefault(from, to Token, text string) { + tsr.ReplaceToken(Default_Program_Name, from, to, text) +} + +func (tsr *TokenStreamRewriter) ReplaceTokenDefaultPos(index Token, text string) { + tsr.ReplaceTokenDefault(index, index, text) +} + +func (tsr *TokenStreamRewriter) Delete(program_name string, from, to int) { + tsr.Replace(program_name, from, to, "") +} + +func (tsr *TokenStreamRewriter) DeleteDefault(from, to int) { + tsr.Delete(Default_Program_Name, from, to) +} + +func (tsr *TokenStreamRewriter) DeleteDefaultPos(index int) { + tsr.DeleteDefault(index, index) +} + +func (tsr *TokenStreamRewriter) DeleteToken(program_name string, from, to Token) { + tsr.ReplaceToken(program_name, from, to, "") +} + +func (tsr *TokenStreamRewriter) DeleteTokenDefault(from, to Token) { + tsr.DeleteToken(Default_Program_Name, from, to) +} + +func (tsr *TokenStreamRewriter) GetLastRewriteTokenIndex(program_name string) int { + i, ok := tsr.last_rewrite_token_indexes[program_name] + if !ok { + return -1 + } + return i +} + +func (tsr *TokenStreamRewriter) GetLastRewriteTokenIndexDefault() int { + return tsr.GetLastRewriteTokenIndex(Default_Program_Name) +} + +func (tsr *TokenStreamRewriter) SetLastRewriteTokenIndex(program_name string, i int) { + tsr.last_rewrite_token_indexes[program_name] = i +} + +func (tsr *TokenStreamRewriter) InitializeProgram(name string) []RewriteOperation { + is := make([]RewriteOperation, 0, Program_Init_Size) + tsr.programs[name] = is + return is +} + +func (tsr *TokenStreamRewriter) AddToProgram(name string, op RewriteOperation) { + is := tsr.GetProgram(name) + is = append(is, op) + tsr.programs[name] = is +} + +func (tsr *TokenStreamRewriter) GetProgram(name string) []RewriteOperation { + is, ok := tsr.programs[name] + if !ok { + is = tsr.InitializeProgram(name) + } + return is +} + +// Return the text from the original tokens altered per the +// instructions given to this rewriter. +func (tsr *TokenStreamRewriter) GetTextDefault() string { + return tsr.GetText( + Default_Program_Name, + NewInterval(0, tsr.tokens.Size()-1)) +} + +// Return the text from the original tokens altered per the +// instructions given to this rewriter. +func (tsr *TokenStreamRewriter) GetText(program_name string, interval *Interval) string { + rewrites := tsr.programs[program_name] + start := interval.Start + stop := interval.Stop + // ensure start/end are in range + stop = min(stop, tsr.tokens.Size()-1) + start = max(start, 0) + if rewrites == nil || len(rewrites) == 0 { + return tsr.tokens.GetTextFromInterval(interval) // no instructions to execute + } + buf := bytes.Buffer{} + // First, optimize instruction stream + indexToOp := reduceToSingleOperationPerIndex(rewrites) + // Walk buffer, executing instructions and emitting tokens + for i := start; i <= stop && i < tsr.tokens.Size(); { + op := indexToOp[i] + delete(indexToOp, i) // remove so any left have index size-1 + t := tsr.tokens.Get(i) + if op == nil { + // no operation at that index, just dump token + if t.GetTokenType() != TokenEOF { + buf.WriteString(t.GetText()) + } + i++ // move to next token + } else { + i = op.Execute(&buf) // execute operation and skip + } + } + // include stuff after end if it's last index in buffer + // So, if they did an insertAfter(lastValidIndex, "foo"), include + // foo if end==lastValidIndex. + if stop == tsr.tokens.Size()-1 { + // Scan any remaining operations after last token + // should be included (they will be inserts). + for _, op := range indexToOp { + if op.GetIndex() >= tsr.tokens.Size()-1 { + buf.WriteString(op.GetText()) + } + } + } + return buf.String() +} + +// We need to combine operations and report invalid operations (like +// overlapping replaces that are not completed nested). Inserts to +// same index need to be combined etc... Here are the cases: +// +// I.i.u I.j.v leave alone, nonoverlapping +// I.i.u I.i.v combine: Iivu +// +// R.i-j.u R.x-y.v | i-j in x-y delete first R +// R.i-j.u R.i-j.v delete first R +// R.i-j.u R.x-y.v | x-y in i-j ERROR +// R.i-j.u R.x-y.v | boundaries overlap ERROR +// +// Delete special case of replace (text==null): +// D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) +// +// I.i.u R.x-y.v | i in (x+1)-y delete I (since insert before +// we're not deleting i) +// I.i.u R.x-y.v | i not in (x+1)-y leave alone, nonoverlapping +// R.x-y.v I.i.u | i in x-y ERROR +// R.x-y.v I.x.u R.x-y.uv (combine, delete I) +// R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping +// +// I.i.u = insert u before op @ index i +// R.x-y.u = replace x-y indexed tokens with u +// +// First we need to examine replaces. For any replace op: +// +// 1. wipe out any insertions before op within that range. +// 2. Drop any replace op before that is contained completely within +// that range. +// 3. Throw exception upon boundary overlap with any previous replace. +// +// Then we can deal with inserts: +// +// 1. for any inserts to same index, combine even if not adjacent. +// 2. for any prior replace with same left boundary, combine this +// insert with replace and delete this replace. +// 3. throw exception if index in same range as previous replace +// +// Don't actually delete; make op null in list. Easier to walk list. +// Later we can throw as we add to index → op map. +// +// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the +// inserted stuff would be before the replace range. But, if you +// add tokens in front of a method body '{' and then delete the method +// body, I think the stuff before the '{' you added should disappear too. +// +// Return a map from token index to operation. +func reduceToSingleOperationPerIndex(rewrites []RewriteOperation) map[int]RewriteOperation { + // WALK REPLACES + for i := 0; i < len(rewrites); i++ { + op := rewrites[i] + if op == nil { + continue + } + rop, ok := op.(*ReplaceOp) + if !ok { + continue + } + // Wipe prior inserts within range + for j := 0; j < i && j < len(rewrites); j++ { + if iop, ok := rewrites[j].(*InsertBeforeOp); ok { + if iop.index == rop.index { + // E.g., insert before 2, delete 2..2; update replace + // text to include insert before, kill insert + rewrites[iop.instruction_index] = nil + if rop.text != "" { + rop.text = iop.text + rop.text + } else { + rop.text = iop.text + } + } else if iop.index > rop.index && iop.index <= rop.LastIndex { + // delete insert as it's a no-op. + rewrites[iop.instruction_index] = nil + } + } + } + // Drop any prior replaces contained within + for j := 0; j < i && j < len(rewrites); j++ { + if prevop, ok := rewrites[j].(*ReplaceOp); ok { + if prevop.index >= rop.index && prevop.LastIndex <= rop.LastIndex { + // delete replace as it's a no-op. + rewrites[prevop.instruction_index] = nil + continue + } + // throw exception unless disjoint or identical + disjoint := prevop.LastIndex < rop.index || prevop.index > rop.LastIndex + // Delete special case of replace (text==null): + // D.i-j.u D.x-y.v | boundaries overlap combine to max(min)..max(right) + if prevop.text == "" && rop.text == "" && !disjoint { + rewrites[prevop.instruction_index] = nil + rop.index = min(prevop.index, rop.index) + rop.LastIndex = max(prevop.LastIndex, rop.LastIndex) + println("new rop" + rop.String()) //TODO: remove console write, taken from Java version + } else if !disjoint { + panic("replace op boundaries of " + rop.String() + " overlap with previous " + prevop.String()) + } + } + } + } + // WALK INSERTS + for i := 0; i < len(rewrites); i++ { + op := rewrites[i] + if op == nil { + continue + } + //hack to replicate inheritance in composition + _, iok := rewrites[i].(*InsertBeforeOp) + _, aok := rewrites[i].(*InsertAfterOp) + if !iok && !aok { + continue + } + iop := rewrites[i] + // combine current insert with prior if any at same index + // deviating a bit from TokenStreamRewriter.java - hard to incorporate inheritance logic + for j := 0; j < i && j < len(rewrites); j++ { + if nextIop, ok := rewrites[j].(*InsertAfterOp); ok { + if nextIop.index == iop.GetIndex() { + iop.SetText(nextIop.text + iop.GetText()) + rewrites[j] = nil + } + } + if prevIop, ok := rewrites[j].(*InsertBeforeOp); ok { + if prevIop.index == iop.GetIndex() { + iop.SetText(iop.GetText() + prevIop.text) + rewrites[prevIop.instruction_index] = nil + } + } + } + // look for replaces where iop.index is in range; error + for j := 0; j < i && j < len(rewrites); j++ { + if rop, ok := rewrites[j].(*ReplaceOp); ok { + if iop.GetIndex() == rop.index { + rop.text = iop.GetText() + rop.text + rewrites[i] = nil + continue + } + if iop.GetIndex() >= rop.index && iop.GetIndex() <= rop.LastIndex { + panic("insert op " + iop.String() + " within boundaries of previous " + rop.String()) + } + } + } + } + m := map[int]RewriteOperation{} + for i := 0; i < len(rewrites); i++ { + op := rewrites[i] + if op == nil { + continue + } + if _, ok := m[op.GetIndex()]; ok { + panic("should only be one op per index") + } + m[op.GetIndex()] = op + } + return m +} + +/* + Quick fixing Go lack of overloads +*/ + +func max(a, b int) int { + if a > b { + return a + } else { + return b + } +} +func min(a, b int) int { + if a < b { + return a + } else { + return b + } +} diff --git a/runtime/Go/antlr/v4/tokenstream_rewriter_test.go b/runtime/Go/antlr/v4/tokenstream_rewriter_test.go new file mode 100644 index 0000000000..a00265128a --- /dev/null +++ b/runtime/Go/antlr/v4/tokenstream_rewriter_test.go @@ -0,0 +1,417 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. +package antlr + +import ( + "fmt" + "strings" + "sync" + "testing" + "unicode" +) + +/* Assume the following grammar for this test. + +lexer grammar LexerA; +A : 'a'; +B : 'b'; +C : 'c'; + +*/ + +func TestInsertBeforeIndex0(t *testing.T) { + input := NewInputStream("abc") + lexer := NewLexerA(input) + stream := NewCommonTokenStream(lexer, 0) + stream.Fill() + tokens := NewTokenStreamRewriter(stream) + tokens.InsertBeforeDefault(0, "0") + result := tokens.GetTextDefault() + if result != "0abc" { + t.Errorf("test failed, got %s", result) + } +} + +func prepare_rewriter(str string) *TokenStreamRewriter { + input := NewInputStream(str) + lexer := NewLexerA(input) + stream := NewCommonTokenStream(lexer, 0) + stream.Fill() + return NewTokenStreamRewriter(stream) +} + +type LexerTest struct { + input string + expected string + description string + expected_exception []string + ops func(*TokenStreamRewriter) +} + +func NewLexerTest(input, expected, desc string, ops func(*TokenStreamRewriter)) LexerTest { + return LexerTest{input: input, expected: expected, description: desc, ops: ops} +} + +func NewLexerExceptionTest(input string, expected_err []string, desc string, ops func(*TokenStreamRewriter)) LexerTest { + return LexerTest{input: input, expected_exception: expected_err, description: desc, ops: ops} +} + +func panic_tester(t *testing.T, expected_msg []string, r *TokenStreamRewriter) { + defer func() { + r := recover() + if r == nil { + t.Errorf("Panic is expected, but finished normally") + } else { + s_e := r.(string) + for _, e := range expected_msg { + if !strings.Contains(s_e, e) { + t.Errorf("Element [%s] is not in error message: [%s]", e, s_e) + } + } + } + }() + r.GetTextDefault() +} + +func TestLexerA(t *testing.T) { + tests := []LexerTest{ + NewLexerTest("abc", "0abc", "InsertBeforeIndex0", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "0") + }), + NewLexerTest("abc", "abcx", "InsertAfterLastIndex", + func(r *TokenStreamRewriter) { + r.InsertAfterDefault(2, "x") + }), + NewLexerTest("abc", "axbxc", "2InsertBeforeAfterMiddleIndex", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(1, "x") + r.InsertAfterDefault(1, "x") + }), + NewLexerTest("abc", "xbc", "ReplaceIndex0", + func(r *TokenStreamRewriter) { + r.ReplaceDefaultPos(0, "x") + }), + NewLexerTest("abc", "abx", "ReplaceLastIndex", + func(r *TokenStreamRewriter) { + r.ReplaceDefaultPos(2, "x") + }), + NewLexerTest("abc", "axc", "ReplaceMiddleIndex", + func(r *TokenStreamRewriter) { + r.ReplaceDefaultPos(1, "x") + }), + NewLexerTest("abc", "ayc", "2ReplaceMiddleIndex", + func(r *TokenStreamRewriter) { + r.ReplaceDefaultPos(1, "x") + r.ReplaceDefaultPos(1, "y") + }), + NewLexerTest("abc", "_ayc", "2ReplaceMiddleIndex1InsertBefore", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "_") + r.ReplaceDefaultPos(1, "x") + r.ReplaceDefaultPos(1, "y") + }), + NewLexerTest("abc", "ac", "ReplaceThenDeleteMiddleIndex", + func(r *TokenStreamRewriter) { + r.ReplaceDefaultPos(1, "x") + r.DeleteDefaultPos(1) + }), + NewLexerExceptionTest("abc", []string{"insert op", "within boundaries of previous"}, + "InsertInPriorReplace", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(0, 2, "x") + r.InsertBeforeDefault(1, "0") + }), + NewLexerTest("abc", "0xbc", "InsertThenReplaceSameIndex", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "0") + r.ReplaceDefaultPos(0, "x") + }), + NewLexerTest("abc", "ayxbc", "2InsertMiddleIndex", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(1, "x") + r.InsertBeforeDefault(1, "y") + }), + NewLexerTest("abc", "yxzbc", "2InsertThenReplaceIndex0", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "x") + r.InsertBeforeDefault(0, "y") + r.ReplaceDefaultPos(0, "z") + }), + NewLexerTest("abc", "abyx", "ReplaceThenInsertBeforeLastIndex", + func(r *TokenStreamRewriter) { + r.ReplaceDefaultPos(2, "x") + r.InsertBeforeDefault(2, "y") + }), + NewLexerTest("abc", "abyx", "InsertThenReplaceLastIndex", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(2, "y") + r.ReplaceDefaultPos(2, "x") + }), + NewLexerTest("abc", "abxy", "ReplaceThenInsertAfterLastIndex", + func(r *TokenStreamRewriter) { + r.ReplaceDefaultPos(2, "x") + r.InsertAfterDefault(2, "y") + }), + NewLexerTest("abcccba", "abyxba", "ReplaceThenInsertAtLeftEdge", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 4, "x") + r.InsertBeforeDefault(2, "y") + }), + NewLexerTest("abcccba", "abyxba", "ReplaceThenInsertAtLeftEdge", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 4, "x") + r.InsertBeforeDefault(2, "y") + }), + NewLexerExceptionTest("abcccba", + []string{"insert op", "InsertBeforeOp", "within boundaries of previous", "ReplaceOp"}, + "ReplaceRangeThenInsertAtRightEdge", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 4, "x") + r.InsertBeforeDefault(4, "y") + }), + NewLexerTest("abcccba", "abxyba", "ReplaceRangeThenInsertAfterRightEdge", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 4, "x") + r.InsertAfterDefault(4, "y") + }), + NewLexerTest("abcccba", "x", "ReplaceAll", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(0, 6, "x") + }), + NewLexerTest("abcccba", "abxyzba", "ReplaceSubsetThenFetch", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 4, "xyz") + }), + NewLexerExceptionTest("abcccba", + []string{"replace op boundaries of", "ReplaceOp", "overlap with previous"}, + "ReplaceThenReplaceSuperset", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 4, "xyz") + r.ReplaceDefault(3, 5, "foo") + }), + NewLexerExceptionTest("abcccba", + []string{"replace op boundaries of", "ReplaceOp", "overlap with previous"}, + "ReplaceThenReplaceLowerIndexedSuperset", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 4, "xyz") + r.ReplaceDefault(1, 3, "foo") + }), + NewLexerTest("abcba", "fooa", "ReplaceSingleMiddleThenOverlappingSuperset", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 2, "xyz") + r.ReplaceDefault(0, 3, "foo") + }), + NewLexerTest("abc", "yxabc", "CombineInserts", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "x") + r.InsertBeforeDefault(0, "y") + }), + NewLexerTest("abc", "yazxbc", "Combine3Inserts", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(1, "x") + r.InsertBeforeDefault(0, "y") + r.InsertBeforeDefault(1, "z") + }), + NewLexerTest("abc", "zfoo", "CombineInsertOnLeftWithReplace", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(0, 2, "foo") + r.InsertBeforeDefault(0, "z") + }), + NewLexerTest("abc", "z", "CombineInsertOnLeftWithDelete", + func(r *TokenStreamRewriter) { + r.DeleteDefault(0, 2) + r.InsertBeforeDefault(0, "z") + }), + NewLexerTest("abc", "zaxbyc", "DisjointInserts", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(1, "x") + r.InsertBeforeDefault(2, "y") + r.InsertBeforeDefault(0, "z") + }), + NewLexerTest("abcc", "bar", "OverlappingReplace", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(1, 2, "foo") + r.ReplaceDefault(0, 3, "bar") + }), + NewLexerExceptionTest("abcc", + []string{"replace op boundaries of", "ReplaceOp", "overlap with previous"}, + "OverlappingReplace2", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(0, 3, "bar") + r.ReplaceDefault(1, 2, "foo") + }), + NewLexerTest("abcc", "barc", "OverlappingReplace3", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(1, 2, "foo") + r.ReplaceDefault(0, 2, "bar") + }), + NewLexerTest("abcc", "abar", "OverlappingReplace4", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(1, 2, "foo") + r.ReplaceDefault(1, 3, "bar") + }), + NewLexerTest("abcc", "afooc", "DropIdenticalReplace", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(1, 2, "foo") + r.ReplaceDefault(1, 2, "foo") + }), + NewLexerTest("abc", "afoofoo", "DropPrevCoveredInsert", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(1, "foo") + r.ReplaceDefault(1, 2, "foo") + }), + NewLexerTest("abcc", "axbfoo", "LeaveAloneDisjointInsert", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(1, "x") + r.ReplaceDefault(2, 3, "foo") + }), + NewLexerTest("abcc", "axbfoo", "LeaveAloneDisjointInsert2", + func(r *TokenStreamRewriter) { + r.ReplaceDefault(2, 3, "foo") + r.InsertBeforeDefault(1, "x") + }), + NewLexerTest("abc", "aby", "InsertBeforeTokenThenDeleteThatToken", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(2, "y") + r.DeleteDefaultPos(2) + }), + NewLexerTest("aa", "aa", "DistinguishBetweenInsertAfterAndInsertBeforeToPreserverOrder", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "") + r.InsertAfterDefault(0, "") + r.InsertBeforeDefault(1, "") + r.InsertAfterDefault(1, "") + }), + NewLexerTest("aa", "

a

a", "DistinguishBetweenInsertAfterAndInsertBeforeToPreserverOrder2", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "

") + r.InsertBeforeDefault(0, "") + r.InsertAfterDefault(0, "

") + r.InsertAfterDefault(0, "") + r.InsertBeforeDefault(1, "") + r.InsertAfterDefault(1, "") + }), + NewLexerTest("ab", "

a

!b", "DistinguishBetweenInsertAfterAndInsertBeforeToPreserverOrder2", + func(r *TokenStreamRewriter) { + r.InsertBeforeDefault(0, "

") + r.InsertBeforeDefault(0, "") + r.InsertBeforeDefault(0, "

") + r.InsertAfterDefault(0, "

") + r.InsertAfterDefault(0, "
") + r.InsertAfterDefault(0, "
") + r.InsertBeforeDefault(1, "!") + }), + } + + for _, c := range tests { + t.Run(c.description, func(t *testing.T) { + rewriter := prepare_rewriter(c.input) + c.ops(rewriter) + if len(c.expected_exception) > 0 { + panic_tester(t, c.expected_exception, rewriter) + } else { + result := rewriter.GetTextDefault() + if result != c.expected { + t.Errorf("Expected:%s | Result: %s", c.expected, result) + } + } + }) + } +} + +// Suppress unused import error +var _ = fmt.Printf +var _ = sync.Once{} +var _ = unicode.IsLetter + +type LexerA struct { + *BaseLexer + channelNames []string + modeNames []string + // TODO: EOF string +} + +var lexeraLexerStaticData struct { + once sync.Once + serializedATN []int32 + channelNames []string + modeNames []string + literalNames []string + symbolicNames []string + ruleNames []string + predictionContextCache *PredictionContextCache + atn *ATN + decisionToDFA []*DFA +} + +func lexeraLexerInit() { + staticData := &lexeraLexerStaticData + staticData.channelNames = []string{ + "DEFAULT_TOKEN_CHANNEL", "HIDDEN", + } + staticData.modeNames = []string{ + "DEFAULT_MODE", + } + staticData.literalNames = []string{ + "", "'a'", "'b'", "'c'", + } + staticData.symbolicNames = []string{ + "", "A", "B", "C", + } + staticData.ruleNames = []string{ + "A", "B", "C", + } + staticData.predictionContextCache = NewPredictionContextCache() + staticData.serializedATN = []int32{ + 4, 0, 3, 13, 6, -1, 2, 0, 7, 0, 2, 1, 7, 1, 2, 2, 7, 2, 1, 0, 1, 0, 1, + 1, 1, 1, 1, 2, 1, 2, 0, 0, 3, 1, 1, 3, 2, 5, 3, 1, 0, 0, 12, 0, 1, 1, 0, + 0, 0, 0, 3, 1, 0, 0, 0, 0, 5, 1, 0, 0, 0, 1, 7, 1, 0, 0, 0, 3, 9, 1, 0, + 0, 0, 5, 11, 1, 0, 0, 0, 7, 8, 5, 97, 0, 0, 8, 2, 1, 0, 0, 0, 9, 10, 5, + 98, 0, 0, 10, 4, 1, 0, 0, 0, 11, 12, 5, 99, 0, 0, 12, 6, 1, 0, 0, 0, 1, + 0, 0, + } + deserializer := NewATNDeserializer(nil) + staticData.atn = deserializer.Deserialize(staticData.serializedATN) + atn := staticData.atn + staticData.decisionToDFA = make([]*DFA, len(atn.DecisionToState)) + decisionToDFA := staticData.decisionToDFA + for index, state := range atn.DecisionToState { + decisionToDFA[index] = NewDFA(state, index) + } +} + +// LexerAInit initializes any static state used to implement LexerA. By default the +// static state used to implement the lexer is lazily initialized during the first call to +// NewLexerA(). You can call this function if you wish to initialize the static state ahead +// of time. +func LexerAInit() { + staticData := &lexeraLexerStaticData + staticData.once.Do(lexeraLexerInit) +} + +// NewLexerA produces a new lexer instance for the optional input antlr.CharStream. +func NewLexerA(input CharStream) *LexerA { + LexerAInit() + l := new(LexerA) + l.BaseLexer = NewBaseLexer(input) + staticData := &lexeraLexerStaticData + l.Interpreter = NewLexerATNSimulator(l, staticData.atn, staticData.decisionToDFA, staticData.predictionContextCache) + l.channelNames = staticData.channelNames + l.modeNames = staticData.modeNames + l.RuleNames = staticData.ruleNames + l.LiteralNames = staticData.literalNames + l.SymbolicNames = staticData.symbolicNames + l.GrammarFileName = "LexerA.g4" + // TODO: l.EOF = antlr.TokenEOF + + return l +} + +// LexerA tokens. +const ( + LexerAA = 1 + LexerAB = 2 + LexerAC = 3 +) diff --git a/runtime/Go/antlr/v4/trace_listener.go b/runtime/Go/antlr/v4/trace_listener.go new file mode 100644 index 0000000000..7b663bf849 --- /dev/null +++ b/runtime/Go/antlr/v4/trace_listener.go @@ -0,0 +1,32 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "fmt" + +type TraceListener struct { + parser *BaseParser +} + +func NewTraceListener(parser *BaseParser) *TraceListener { + tl := new(TraceListener) + tl.parser = parser + return tl +} + +func (t *TraceListener) VisitErrorNode(_ ErrorNode) { +} + +func (t *TraceListener) EnterEveryRule(ctx ParserRuleContext) { + fmt.Println("enter " + t.parser.GetRuleNames()[ctx.GetRuleIndex()] + ", LT(1)=" + t.parser.input.LT(1).GetText()) +} + +func (t *TraceListener) VisitTerminal(node TerminalNode) { + fmt.Println("consume " + fmt.Sprint(node.GetSymbol()) + " rule " + t.parser.GetRuleNames()[t.parser.ctx.GetRuleIndex()]) +} + +func (t *TraceListener) ExitEveryRule(ctx ParserRuleContext) { + fmt.Println("exit " + t.parser.GetRuleNames()[ctx.GetRuleIndex()] + ", LT(1)=" + t.parser.input.LT(1).GetText()) +} diff --git a/runtime/Go/antlr/v4/transition.go b/runtime/Go/antlr/v4/transition.go new file mode 100644 index 0000000000..36be4f7331 --- /dev/null +++ b/runtime/Go/antlr/v4/transition.go @@ -0,0 +1,428 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "fmt" + "strconv" + "strings" +) + +// atom, set, epsilon, action, predicate, rule transitions. +// +//

This is a one way link. It emanates from a state (usually via a list of +// transitions) and has a target state.

+// +//

Since we never have to change the ATN transitions once we construct it, +// the states. We'll use the term Edge for the DFA to distinguish them from +// ATN transitions.

+ +type Transition interface { + getTarget() ATNState + setTarget(ATNState) + getIsEpsilon() bool + getLabel() *IntervalSet + getSerializationType() int + Matches(int, int, int) bool +} + +type BaseTransition struct { + target ATNState + isEpsilon bool + label int + intervalSet *IntervalSet + serializationType int +} + +func NewBaseTransition(target ATNState) *BaseTransition { + + if target == nil { + panic("target cannot be nil.") + } + + t := new(BaseTransition) + + t.target = target + // Are we epsilon, action, sempred? + t.isEpsilon = false + t.intervalSet = nil + + return t +} + +func (t *BaseTransition) getTarget() ATNState { + return t.target +} + +func (t *BaseTransition) setTarget(s ATNState) { + t.target = s +} + +func (t *BaseTransition) getIsEpsilon() bool { + return t.isEpsilon +} + +func (t *BaseTransition) getLabel() *IntervalSet { + return t.intervalSet +} + +func (t *BaseTransition) getSerializationType() int { + return t.serializationType +} + +func (t *BaseTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + panic("Not implemented") +} + +const ( + TransitionEPSILON = 1 + TransitionRANGE = 2 + TransitionRULE = 3 + TransitionPREDICATE = 4 // e.g., {isType(input.LT(1))}? + TransitionATOM = 5 + TransitionACTION = 6 + TransitionSET = 7 // ~(A|B) or ~atom, wildcard, which convert to next 2 + TransitionNOTSET = 8 + TransitionWILDCARD = 9 + TransitionPRECEDENCE = 10 +) + +var TransitionserializationNames = []string{ + "INVALID", + "EPSILON", + "RANGE", + "RULE", + "PREDICATE", + "ATOM", + "ACTION", + "SET", + "NOT_SET", + "WILDCARD", + "PRECEDENCE", +} + +//var TransitionserializationTypes struct { +// EpsilonTransition int +// RangeTransition int +// RuleTransition int +// PredicateTransition int +// AtomTransition int +// ActionTransition int +// SetTransition int +// NotSetTransition int +// WildcardTransition int +// PrecedencePredicateTransition int +//}{ +// TransitionEPSILON, +// TransitionRANGE, +// TransitionRULE, +// TransitionPREDICATE, +// TransitionATOM, +// TransitionACTION, +// TransitionSET, +// TransitionNOTSET, +// TransitionWILDCARD, +// TransitionPRECEDENCE +//} + +// TODO: make all transitions sets? no, should remove set edges +type AtomTransition struct { + *BaseTransition +} + +func NewAtomTransition(target ATNState, intervalSet int) *AtomTransition { + + t := new(AtomTransition) + t.BaseTransition = NewBaseTransition(target) + + t.label = intervalSet // The token type or character value or, signifies special intervalSet. + t.intervalSet = t.makeLabel() + t.serializationType = TransitionATOM + + return t +} + +func (t *AtomTransition) makeLabel() *IntervalSet { + s := NewIntervalSet() + s.addOne(t.label) + return s +} + +func (t *AtomTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return t.label == symbol +} + +func (t *AtomTransition) String() string { + return strconv.Itoa(t.label) +} + +type RuleTransition struct { + *BaseTransition + + followState ATNState + ruleIndex, precedence int +} + +func NewRuleTransition(ruleStart ATNState, ruleIndex, precedence int, followState ATNState) *RuleTransition { + + t := new(RuleTransition) + t.BaseTransition = NewBaseTransition(ruleStart) + + t.ruleIndex = ruleIndex + t.precedence = precedence + t.followState = followState + t.serializationType = TransitionRULE + t.isEpsilon = true + + return t +} + +func (t *RuleTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return false +} + +type EpsilonTransition struct { + *BaseTransition + + outermostPrecedenceReturn int +} + +func NewEpsilonTransition(target ATNState, outermostPrecedenceReturn int) *EpsilonTransition { + + t := new(EpsilonTransition) + t.BaseTransition = NewBaseTransition(target) + + t.serializationType = TransitionEPSILON + t.isEpsilon = true + t.outermostPrecedenceReturn = outermostPrecedenceReturn + return t +} + +func (t *EpsilonTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return false +} + +func (t *EpsilonTransition) String() string { + return "epsilon" +} + +type RangeTransition struct { + *BaseTransition + + start, stop int +} + +func NewRangeTransition(target ATNState, start, stop int) *RangeTransition { + + t := new(RangeTransition) + t.BaseTransition = NewBaseTransition(target) + + t.serializationType = TransitionRANGE + t.start = start + t.stop = stop + t.intervalSet = t.makeLabel() + return t +} + +func (t *RangeTransition) makeLabel() *IntervalSet { + s := NewIntervalSet() + s.addRange(t.start, t.stop) + return s +} + +func (t *RangeTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return symbol >= t.start && symbol <= t.stop +} + +func (t *RangeTransition) String() string { + var sb strings.Builder + sb.WriteByte('\'') + sb.WriteRune(rune(t.start)) + sb.WriteString("'..'") + sb.WriteRune(rune(t.stop)) + sb.WriteByte('\'') + return sb.String() +} + +type AbstractPredicateTransition interface { + Transition + IAbstractPredicateTransitionFoo() +} + +type BaseAbstractPredicateTransition struct { + *BaseTransition +} + +func NewBasePredicateTransition(target ATNState) *BaseAbstractPredicateTransition { + + t := new(BaseAbstractPredicateTransition) + t.BaseTransition = NewBaseTransition(target) + + return t +} + +func (a *BaseAbstractPredicateTransition) IAbstractPredicateTransitionFoo() {} + +type PredicateTransition struct { + *BaseAbstractPredicateTransition + + isCtxDependent bool + ruleIndex, predIndex int +} + +func NewPredicateTransition(target ATNState, ruleIndex, predIndex int, isCtxDependent bool) *PredicateTransition { + + t := new(PredicateTransition) + t.BaseAbstractPredicateTransition = NewBasePredicateTransition(target) + + t.serializationType = TransitionPREDICATE + t.ruleIndex = ruleIndex + t.predIndex = predIndex + t.isCtxDependent = isCtxDependent // e.g., $i ref in pred + t.isEpsilon = true + return t +} + +func (t *PredicateTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return false +} + +func (t *PredicateTransition) getPredicate() *Predicate { + return NewPredicate(t.ruleIndex, t.predIndex, t.isCtxDependent) +} + +func (t *PredicateTransition) String() string { + return "pred_" + strconv.Itoa(t.ruleIndex) + ":" + strconv.Itoa(t.predIndex) +} + +type ActionTransition struct { + *BaseTransition + + isCtxDependent bool + ruleIndex, actionIndex, predIndex int +} + +func NewActionTransition(target ATNState, ruleIndex, actionIndex int, isCtxDependent bool) *ActionTransition { + + t := new(ActionTransition) + t.BaseTransition = NewBaseTransition(target) + + t.serializationType = TransitionACTION + t.ruleIndex = ruleIndex + t.actionIndex = actionIndex + t.isCtxDependent = isCtxDependent // e.g., $i ref in pred + t.isEpsilon = true + return t +} + +func (t *ActionTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return false +} + +func (t *ActionTransition) String() string { + return "action_" + strconv.Itoa(t.ruleIndex) + ":" + strconv.Itoa(t.actionIndex) +} + +type SetTransition struct { + *BaseTransition +} + +func NewSetTransition(target ATNState, set *IntervalSet) *SetTransition { + + t := new(SetTransition) + t.BaseTransition = NewBaseTransition(target) + + t.serializationType = TransitionSET + if set != nil { + t.intervalSet = set + } else { + t.intervalSet = NewIntervalSet() + t.intervalSet.addOne(TokenInvalidType) + } + + return t +} + +func (t *SetTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return t.intervalSet.contains(symbol) +} + +func (t *SetTransition) String() string { + return t.intervalSet.String() +} + +type NotSetTransition struct { + *SetTransition +} + +func NewNotSetTransition(target ATNState, set *IntervalSet) *NotSetTransition { + + t := new(NotSetTransition) + + t.SetTransition = NewSetTransition(target, set) + + t.serializationType = TransitionNOTSET + + return t +} + +func (t *NotSetTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return symbol >= minVocabSymbol && symbol <= maxVocabSymbol && !t.intervalSet.contains(symbol) +} + +func (t *NotSetTransition) String() string { + return "~" + t.intervalSet.String() +} + +type WildcardTransition struct { + *BaseTransition +} + +func NewWildcardTransition(target ATNState) *WildcardTransition { + + t := new(WildcardTransition) + t.BaseTransition = NewBaseTransition(target) + + t.serializationType = TransitionWILDCARD + return t +} + +func (t *WildcardTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return symbol >= minVocabSymbol && symbol <= maxVocabSymbol +} + +func (t *WildcardTransition) String() string { + return "." +} + +type PrecedencePredicateTransition struct { + *BaseAbstractPredicateTransition + + precedence int +} + +func NewPrecedencePredicateTransition(target ATNState, precedence int) *PrecedencePredicateTransition { + + t := new(PrecedencePredicateTransition) + t.BaseAbstractPredicateTransition = NewBasePredicateTransition(target) + + t.serializationType = TransitionPRECEDENCE + t.precedence = precedence + t.isEpsilon = true + + return t +} + +func (t *PrecedencePredicateTransition) Matches(symbol, minVocabSymbol, maxVocabSymbol int) bool { + return false +} + +func (t *PrecedencePredicateTransition) getPredicate() *PrecedencePredicate { + return NewPrecedencePredicate(t.precedence) +} + +func (t *PrecedencePredicateTransition) String() string { + return fmt.Sprint(t.precedence) + " >= _p" +} diff --git a/runtime/Go/antlr/v4/tree.go b/runtime/Go/antlr/v4/tree.go new file mode 100644 index 0000000000..85b4f137b5 --- /dev/null +++ b/runtime/Go/antlr/v4/tree.go @@ -0,0 +1,253 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +// The basic notion of a tree has a parent, a payload, and a list of children. +// It is the most abstract interface for all the trees used by ANTLR. +/// + +var TreeInvalidInterval = NewInterval(-1, -2) + +type Tree interface { + GetParent() Tree + SetParent(Tree) + GetPayload() interface{} + GetChild(i int) Tree + GetChildCount() int + GetChildren() []Tree +} + +type SyntaxTree interface { + Tree + + GetSourceInterval() *Interval +} + +type ParseTree interface { + SyntaxTree + + Accept(Visitor ParseTreeVisitor) interface{} + GetText() string + + ToStringTree([]string, Recognizer) string +} + +type RuleNode interface { + ParseTree + + GetRuleContext() RuleContext + GetBaseRuleContext() *BaseRuleContext +} + +type TerminalNode interface { + ParseTree + + GetSymbol() Token +} + +type ErrorNode interface { + TerminalNode + + errorNode() +} + +type ParseTreeVisitor interface { + Visit(tree ParseTree) interface{} + VisitChildren(node RuleNode) interface{} + VisitTerminal(node TerminalNode) interface{} + VisitErrorNode(node ErrorNode) interface{} +} + +type BaseParseTreeVisitor struct{} + +var _ ParseTreeVisitor = &BaseParseTreeVisitor{} + +func (v *BaseParseTreeVisitor) Visit(tree ParseTree) interface{} { return tree.Accept(v) } +func (v *BaseParseTreeVisitor) VisitChildren(node RuleNode) interface{} { return nil } +func (v *BaseParseTreeVisitor) VisitTerminal(node TerminalNode) interface{} { return nil } +func (v *BaseParseTreeVisitor) VisitErrorNode(node ErrorNode) interface{} { return nil } + +// TODO +//func (this ParseTreeVisitor) Visit(ctx) { +// if (Utils.isArray(ctx)) { +// self := this +// return ctx.map(function(child) { return VisitAtom(self, child)}) +// } else { +// return VisitAtom(this, ctx) +// } +//} +// +//func VisitAtom(Visitor, ctx) { +// if (ctx.parser == nil) { //is terminal +// return +// } +// +// name := ctx.parser.ruleNames[ctx.ruleIndex] +// funcName := "Visit" + Utils.titleCase(name) +// +// return Visitor[funcName](ctx) +//} + +type ParseTreeListener interface { + VisitTerminal(node TerminalNode) + VisitErrorNode(node ErrorNode) + EnterEveryRule(ctx ParserRuleContext) + ExitEveryRule(ctx ParserRuleContext) +} + +type BaseParseTreeListener struct{} + +var _ ParseTreeListener = &BaseParseTreeListener{} + +func (l *BaseParseTreeListener) VisitTerminal(node TerminalNode) {} +func (l *BaseParseTreeListener) VisitErrorNode(node ErrorNode) {} +func (l *BaseParseTreeListener) EnterEveryRule(ctx ParserRuleContext) {} +func (l *BaseParseTreeListener) ExitEveryRule(ctx ParserRuleContext) {} + +type TerminalNodeImpl struct { + parentCtx RuleContext + + symbol Token +} + +var _ TerminalNode = &TerminalNodeImpl{} + +func NewTerminalNodeImpl(symbol Token) *TerminalNodeImpl { + tn := new(TerminalNodeImpl) + + tn.parentCtx = nil + tn.symbol = symbol + + return tn +} + +func (t *TerminalNodeImpl) GetChild(i int) Tree { + return nil +} + +func (t *TerminalNodeImpl) GetChildren() []Tree { + return nil +} + +func (t *TerminalNodeImpl) SetChildren(tree []Tree) { + panic("Cannot set children on terminal node") +} + +func (t *TerminalNodeImpl) GetSymbol() Token { + return t.symbol +} + +func (t *TerminalNodeImpl) GetParent() Tree { + return t.parentCtx +} + +func (t *TerminalNodeImpl) SetParent(tree Tree) { + t.parentCtx = tree.(RuleContext) +} + +func (t *TerminalNodeImpl) GetPayload() interface{} { + return t.symbol +} + +func (t *TerminalNodeImpl) GetSourceInterval() *Interval { + if t.symbol == nil { + return TreeInvalidInterval + } + tokenIndex := t.symbol.GetTokenIndex() + return NewInterval(tokenIndex, tokenIndex) +} + +func (t *TerminalNodeImpl) GetChildCount() int { + return 0 +} + +func (t *TerminalNodeImpl) Accept(v ParseTreeVisitor) interface{} { + return v.VisitTerminal(t) +} + +func (t *TerminalNodeImpl) GetText() string { + return t.symbol.GetText() +} + +func (t *TerminalNodeImpl) String() string { + if t.symbol.GetTokenType() == TokenEOF { + return "" + } + + return t.symbol.GetText() +} + +func (t *TerminalNodeImpl) ToStringTree(s []string, r Recognizer) string { + return t.String() +} + +// Represents a token that was consumed during reSynchronization +// rather than during a valid Match operation. For example, +// we will create this kind of a node during single token insertion +// and deletion as well as during "consume until error recovery set" +// upon no viable alternative exceptions. + +type ErrorNodeImpl struct { + *TerminalNodeImpl +} + +var _ ErrorNode = &ErrorNodeImpl{} + +func NewErrorNodeImpl(token Token) *ErrorNodeImpl { + en := new(ErrorNodeImpl) + en.TerminalNodeImpl = NewTerminalNodeImpl(token) + return en +} + +func (e *ErrorNodeImpl) errorNode() {} + +func (e *ErrorNodeImpl) Accept(v ParseTreeVisitor) interface{} { + return v.VisitErrorNode(e) +} + +type ParseTreeWalker struct { +} + +func NewParseTreeWalker() *ParseTreeWalker { + return new(ParseTreeWalker) +} + +// Performs a walk on the given parse tree starting at the root and going down recursively +// with depth-first search. On each node, EnterRule is called before +// recursively walking down into child nodes, then +// ExitRule is called after the recursive call to wind up. +func (p *ParseTreeWalker) Walk(listener ParseTreeListener, t Tree) { + switch tt := t.(type) { + case ErrorNode: + listener.VisitErrorNode(tt) + case TerminalNode: + listener.VisitTerminal(tt) + default: + p.EnterRule(listener, t.(RuleNode)) + for i := 0; i < t.GetChildCount(); i++ { + child := t.GetChild(i) + p.Walk(listener, child) + } + p.ExitRule(listener, t.(RuleNode)) + } +} + +// Enters a grammar rule by first triggering the generic event {@link ParseTreeListener//EnterEveryRule} +// then by triggering the event specific to the given parse tree node +func (p *ParseTreeWalker) EnterRule(listener ParseTreeListener, r RuleNode) { + ctx := r.GetRuleContext().(ParserRuleContext) + listener.EnterEveryRule(ctx) + ctx.EnterRule(listener) +} + +// Exits a grammar rule by first triggering the event specific to the given parse tree node +// then by triggering the generic event {@link ParseTreeListener//ExitEveryRule} +func (p *ParseTreeWalker) ExitRule(listener ParseTreeListener, r RuleNode) { + ctx := r.GetRuleContext().(ParserRuleContext) + ctx.ExitRule(listener) + listener.ExitEveryRule(ctx) +} + +var ParseTreeWalkerDefault = NewParseTreeWalker() diff --git a/runtime/Go/antlr/v4/trees.go b/runtime/Go/antlr/v4/trees.go new file mode 100644 index 0000000000..d7dbb03228 --- /dev/null +++ b/runtime/Go/antlr/v4/trees.go @@ -0,0 +1,138 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import "fmt" + +/** A set of utility routines useful for all kinds of ANTLR trees. */ + +// Print out a whole tree in LISP form. {@link //getNodeText} is used on the +// +// node payloads to get the text for the nodes. Detect +// parse trees and extract data appropriately. +func TreesStringTree(tree Tree, ruleNames []string, recog Recognizer) string { + + if recog != nil { + ruleNames = recog.GetRuleNames() + } + + s := TreesGetNodeText(tree, ruleNames, nil) + + s = EscapeWhitespace(s, false) + c := tree.GetChildCount() + if c == 0 { + return s + } + res := "(" + s + " " + if c > 0 { + s = TreesStringTree(tree.GetChild(0), ruleNames, nil) + res += s + } + for i := 1; i < c; i++ { + s = TreesStringTree(tree.GetChild(i), ruleNames, nil) + res += (" " + s) + } + res += ")" + return res +} + +func TreesGetNodeText(t Tree, ruleNames []string, recog Parser) string { + if recog != nil { + ruleNames = recog.GetRuleNames() + } + + if ruleNames != nil { + switch t2 := t.(type) { + case RuleNode: + t3 := t2.GetRuleContext() + altNumber := t3.GetAltNumber() + + if altNumber != ATNInvalidAltNumber { + return fmt.Sprintf("%s:%d", ruleNames[t3.GetRuleIndex()], altNumber) + } + return ruleNames[t3.GetRuleIndex()] + case ErrorNode: + return fmt.Sprint(t2) + case TerminalNode: + if t2.GetSymbol() != nil { + return t2.GetSymbol().GetText() + } + } + } + + // no recog for rule names + payload := t.GetPayload() + if p2, ok := payload.(Token); ok { + return p2.GetText() + } + + return fmt.Sprint(t.GetPayload()) +} + +// Return ordered list of all children of this node +func TreesGetChildren(t Tree) []Tree { + list := make([]Tree, 0) + for i := 0; i < t.GetChildCount(); i++ { + list = append(list, t.GetChild(i)) + } + return list +} + +// Return a list of all ancestors of this node. The first node of +// +// list is the root and the last is the parent of this node. +func TreesgetAncestors(t Tree) []Tree { + ancestors := make([]Tree, 0) + t = t.GetParent() + for t != nil { + f := []Tree{t} + ancestors = append(f, ancestors...) + t = t.GetParent() + } + return ancestors +} + +func TreesFindAllTokenNodes(t ParseTree, ttype int) []ParseTree { + return TreesfindAllNodes(t, ttype, true) +} + +func TreesfindAllRuleNodes(t ParseTree, ruleIndex int) []ParseTree { + return TreesfindAllNodes(t, ruleIndex, false) +} + +func TreesfindAllNodes(t ParseTree, index int, findTokens bool) []ParseTree { + nodes := make([]ParseTree, 0) + treesFindAllNodes(t, index, findTokens, &nodes) + return nodes +} + +func treesFindAllNodes(t ParseTree, index int, findTokens bool, nodes *[]ParseTree) { + // check this node (the root) first + + t2, ok := t.(TerminalNode) + t3, ok2 := t.(ParserRuleContext) + + if findTokens && ok { + if t2.GetSymbol().GetTokenType() == index { + *nodes = append(*nodes, t2) + } + } else if !findTokens && ok2 { + if t3.GetRuleIndex() == index { + *nodes = append(*nodes, t3) + } + } + // check children + for i := 0; i < t.GetChildCount(); i++ { + treesFindAllNodes(t.GetChild(i).(ParseTree), index, findTokens, nodes) + } +} + +func TreesDescendants(t ParseTree) []ParseTree { + nodes := []ParseTree{t} + for i := 0; i < t.GetChildCount(); i++ { + nodes = append(nodes, TreesDescendants(t.GetChild(i).(ParseTree))...) + } + return nodes +} diff --git a/runtime/Go/antlr/v4/utils.go b/runtime/Go/antlr/v4/utils.go new file mode 100644 index 0000000000..9fad5d916b --- /dev/null +++ b/runtime/Go/antlr/v4/utils.go @@ -0,0 +1,352 @@ +// Copyright (c) 2012-2022 The ANTLR Project. All rights reserved. +// Use of this file is governed by the BSD 3-clause license that +// can be found in the LICENSE.txt file in the project root. + +package antlr + +import ( + "bytes" + "errors" + "fmt" + "math/bits" + "strconv" + "strings" +) + +func intMin(a, b int) int { + if a < b { + return a + } + return b +} + +func intMax(a, b int) int { + if a > b { + return a + } + return b +} + +// A simple integer stack + +type IntStack []int + +var ErrEmptyStack = errors.New("Stack is empty") + +func (s *IntStack) Pop() (int, error) { + l := len(*s) - 1 + if l < 0 { + return 0, ErrEmptyStack + } + v := (*s)[l] + *s = (*s)[0:l] + return v, nil +} + +func (s *IntStack) Push(e int) { + *s = append(*s, e) +} + +type comparable interface { + Equals(other Collectable[any]) bool +} + +func standardEqualsFunction(a Collectable[any], b Collectable[any]) bool { + + return a.Equals(b) +} + +func standardHashFunction(a interface{}) int { + if h, ok := a.(hasher); ok { + return h.Hash() + } + + panic("Not Hasher") +} + +type hasher interface { + Hash() int +} + +const bitsPerWord = 64 + +func indexForBit(bit int) int { + return bit / bitsPerWord +} + +func wordForBit(data []uint64, bit int) uint64 { + idx := indexForBit(bit) + if idx >= len(data) { + return 0 + } + return data[idx] +} + +func maskForBit(bit int) uint64 { + return uint64(1) << (bit % bitsPerWord) +} + +func wordsNeeded(bit int) int { + return indexForBit(bit) + 1 +} + +type BitSet struct { + data []uint64 +} + +func NewBitSet() *BitSet { + return &BitSet{} +} + +func (b *BitSet) add(value int) { + idx := indexForBit(value) + if idx >= len(b.data) { + size := wordsNeeded(value) + data := make([]uint64, size) + copy(data, b.data) + b.data = data + } + b.data[idx] |= maskForBit(value) +} + +func (b *BitSet) clear(index int) { + idx := indexForBit(index) + if idx >= len(b.data) { + return + } + b.data[idx] &= ^maskForBit(index) +} + +func (b *BitSet) or(set *BitSet) { + // Get min size necessary to represent the bits in both sets. + bLen := b.minLen() + setLen := set.minLen() + maxLen := intMax(bLen, setLen) + if maxLen > len(b.data) { + // Increase the size of len(b.data) to repesent the bits in both sets. + data := make([]uint64, maxLen) + copy(data, b.data) + b.data = data + } + // len(b.data) is at least setLen. + for i := 0; i < setLen; i++ { + b.data[i] |= set.data[i] + } +} + +func (b *BitSet) remove(value int) { + b.clear(value) +} + +func (b *BitSet) contains(value int) bool { + idx := indexForBit(value) + if idx >= len(b.data) { + return false + } + return (b.data[idx] & maskForBit(value)) != 0 +} + +func (b *BitSet) minValue() int { + for i, v := range b.data { + if v == 0 { + continue + } + return i*bitsPerWord + bits.TrailingZeros64(v) + } + return 2147483647 +} + +func (b *BitSet) equals(other interface{}) bool { + otherBitSet, ok := other.(*BitSet) + if !ok { + return false + } + + if b == otherBitSet { + return true + } + + // We only compare set bits, so we cannot rely on the two slices having the same size. Its + // possible for two BitSets to have different slice lengths but the same set bits. So we only + // compare the relevant words and ignore the trailing zeros. + bLen := b.minLen() + otherLen := otherBitSet.minLen() + + if bLen != otherLen { + return false + } + + for i := 0; i < bLen; i++ { + if b.data[i] != otherBitSet.data[i] { + return false + } + } + + return true +} + +func (b *BitSet) minLen() int { + for i := len(b.data); i > 0; i-- { + if b.data[i-1] != 0 { + return i + } + } + return 0 +} + +func (b *BitSet) length() int { + cnt := 0 + for _, val := range b.data { + cnt += bits.OnesCount64(val) + } + return cnt +} + +func (b *BitSet) String() string { + vals := make([]string, 0, b.length()) + + for i, v := range b.data { + for v != 0 { + n := bits.TrailingZeros64(v) + vals = append(vals, strconv.Itoa(i*bitsPerWord+n)) + v &= ^(uint64(1) << n) + } + } + + return "{" + strings.Join(vals, ", ") + "}" +} + +type AltDict struct { + data map[string]interface{} +} + +func NewAltDict() *AltDict { + d := new(AltDict) + d.data = make(map[string]interface{}) + return d +} + +func (a *AltDict) Get(key string) interface{} { + key = "k-" + key + return a.data[key] +} + +func (a *AltDict) put(key string, value interface{}) { + key = "k-" + key + a.data[key] = value +} + +func (a *AltDict) values() []interface{} { + vs := make([]interface{}, len(a.data)) + i := 0 + for _, v := range a.data { + vs[i] = v + i++ + } + return vs +} + +type DoubleDict struct { + data map[int]map[int]interface{} +} + +func NewDoubleDict() *DoubleDict { + dd := new(DoubleDict) + dd.data = make(map[int]map[int]interface{}) + return dd +} + +func (d *DoubleDict) Get(a, b int) interface{} { + data := d.data[a] + + if data == nil { + return nil + } + + return data[b] +} + +func (d *DoubleDict) set(a, b int, o interface{}) { + data := d.data[a] + + if data == nil { + data = make(map[int]interface{}) + d.data[a] = data + } + + data[b] = o +} + +func EscapeWhitespace(s string, escapeSpaces bool) string { + + s = strings.Replace(s, "\t", "\\t", -1) + s = strings.Replace(s, "\n", "\\n", -1) + s = strings.Replace(s, "\r", "\\r", -1) + if escapeSpaces { + s = strings.Replace(s, " ", "\u00B7", -1) + } + return s +} + +func TerminalNodeToStringArray(sa []TerminalNode) []string { + st := make([]string, len(sa)) + + for i, s := range sa { + st[i] = fmt.Sprintf("%v", s) + } + + return st +} + +func PrintArrayJavaStyle(sa []string) string { + var buffer bytes.Buffer + + buffer.WriteString("[") + + for i, s := range sa { + buffer.WriteString(s) + if i != len(sa)-1 { + buffer.WriteString(", ") + } + } + + buffer.WriteString("]") + + return buffer.String() +} + +// murmur hash +func murmurInit(seed int) int { + return seed +} + +func murmurUpdate(h int, value int) int { + const c1 uint32 = 0xCC9E2D51 + const c2 uint32 = 0x1B873593 + const r1 uint32 = 15 + const r2 uint32 = 13 + const m uint32 = 5 + const n uint32 = 0xE6546B64 + + k := uint32(value) + k *= c1 + k = (k << r1) | (k >> (32 - r1)) + k *= c2 + + hash := uint32(h) ^ k + hash = (hash << r2) | (hash >> (32 - r2)) + hash = hash*m + n + return int(hash) +} + +func murmurFinish(h int, numberOfWords int) int { + var hash = uint32(h) + hash ^= uint32(numberOfWords) << 2 + hash ^= hash >> 16 + hash *= 0x85ebca6b + hash ^= hash >> 13 + hash *= 0xc2b2ae35 + hash ^= hash >> 16 + + return int(hash) +} diff --git a/runtime/Go/antlr/v4/utils_set.go b/runtime/Go/antlr/v4/utils_set.go new file mode 100644 index 0000000000..c9bd6751e3 --- /dev/null +++ b/runtime/Go/antlr/v4/utils_set.go @@ -0,0 +1,235 @@ +package antlr + +import "math" + +const ( + _initalCapacity = 16 + _initalBucketCapacity = 8 + _loadFactor = 0.75 +) + +type Set interface { + Add(value interface{}) (added interface{}) + Len() int + Get(value interface{}) (found interface{}) + Contains(value interface{}) bool + Values() []interface{} + Each(f func(interface{}) bool) +} + +type array2DHashSet struct { + buckets [][]Collectable[any] + hashcodeFunction func(interface{}) int + equalsFunction func(Collectable[any], Collectable[any]) bool + + n int // How many elements in set + threshold int // when to expand + + currentPrime int // jump by 4 primes each expand or whatever + initialBucketCapacity int +} + +func (as *array2DHashSet) Each(f func(interface{}) bool) { + if as.Len() < 1 { + return + } + + for _, bucket := range as.buckets { + for _, o := range bucket { + if o == nil { + break + } + if !f(o) { + return + } + } + } +} + +func (as *array2DHashSet) Values() []interface{} { + if as.Len() < 1 { + return nil + } + + values := make([]interface{}, 0, as.Len()) + as.Each(func(i interface{}) bool { + values = append(values, i) + return true + }) + return values +} + +func (as *array2DHashSet) Contains(value Collectable[any]) bool { + return as.Get(value) != nil +} + +func (as *array2DHashSet) Add(value Collectable[any]) interface{} { + if as.n > as.threshold { + as.expand() + } + return as.innerAdd(value) +} + +func (as *array2DHashSet) expand() { + old := as.buckets + + as.currentPrime += 4 + + var ( + newCapacity = len(as.buckets) << 1 + newTable = as.createBuckets(newCapacity) + newBucketLengths = make([]int, len(newTable)) + ) + + as.buckets = newTable + as.threshold = int(float64(newCapacity) * _loadFactor) + + for _, bucket := range old { + if bucket == nil { + continue + } + + for _, o := range bucket { + if o == nil { + break + } + + b := as.getBuckets(o) + bucketLength := newBucketLengths[b] + var newBucket []Collectable[any] + if bucketLength == 0 { + // new bucket + newBucket = as.createBucket(as.initialBucketCapacity) + newTable[b] = newBucket + } else { + newBucket = newTable[b] + if bucketLength == len(newBucket) { + // expand + newBucketCopy := make([]Collectable[any], len(newBucket)<<1) + copy(newBucketCopy[:bucketLength], newBucket) + newBucket = newBucketCopy + newTable[b] = newBucket + } + } + + newBucket[bucketLength] = o + newBucketLengths[b]++ + } + } +} + +func (as *array2DHashSet) Len() int { + return as.n +} + +func (as *array2DHashSet) Get(o Collectable[any]) interface{} { + if o == nil { + return nil + } + + b := as.getBuckets(o) + bucket := as.buckets[b] + if bucket == nil { // no bucket + return nil + } + + for _, e := range bucket { + if e == nil { + return nil // empty slot; not there + } + if as.equalsFunction(e, o) { + return e + } + } + + return nil +} + +func (as *array2DHashSet) innerAdd(o Collectable[any]) interface{} { + b := as.getBuckets(o) + + bucket := as.buckets[b] + + // new bucket + if bucket == nil { + bucket = as.createBucket(as.initialBucketCapacity) + bucket[0] = o + + as.buckets[b] = bucket + as.n++ + return o + } + + // look for it in bucket + for i := 0; i < len(bucket); i++ { + existing := bucket[i] + if existing == nil { // empty slot; not there, add. + bucket[i] = o + as.n++ + return o + } + + if as.equalsFunction(existing, o) { // found existing, quit + return existing + } + } + + // full bucket, expand and add to end + oldLength := len(bucket) + bucketCopy := make([]Collectable[any], oldLength<<1) + copy(bucketCopy[:oldLength], bucket) + bucket = bucketCopy + as.buckets[b] = bucket + bucket[oldLength] = o + as.n++ + return o +} + +func (as *array2DHashSet) getBuckets(value Collectable[any]) int { + hash := as.hashcodeFunction(value) + return hash & (len(as.buckets) - 1) +} + +func (as *array2DHashSet) createBuckets(cap int) [][]Collectable[any] { + return make([][]Collectable[any], cap) +} + +func (as *array2DHashSet) createBucket(cap int) []Collectable[any] { + return make([]Collectable[any], cap) +} + +func newArray2DHashSetWithCap( + hashcodeFunction func(interface{}) int, + equalsFunction func(Collectable[any], Collectable[any]) bool, + initCap int, + initBucketCap int, +) *array2DHashSet { + if hashcodeFunction == nil { + hashcodeFunction = standardHashFunction + } + + if equalsFunction == nil { + equalsFunction = standardEqualsFunction + } + + ret := &array2DHashSet{ + hashcodeFunction: hashcodeFunction, + equalsFunction: equalsFunction, + + n: 0, + threshold: int(math.Floor(_initalCapacity * _loadFactor)), + + currentPrime: 1, + initialBucketCapacity: initBucketCap, + } + + ret.buckets = ret.createBuckets(initCap) + return ret +} + +func newArray2DHashSet( + hashcodeFunction func(interface{}) int, + equalsFunction func(Collectable[any], Collectable[any]) bool, +) *array2DHashSet { + return newArray2DHashSetWithCap(hashcodeFunction, equalsFunction, _initalCapacity, _initalBucketCapacity) +} diff --git a/runtime/Go/antlr/v4/utils_test.go b/runtime/Go/antlr/v4/utils_test.go new file mode 100644 index 0000000000..ed274ef339 --- /dev/null +++ b/runtime/Go/antlr/v4/utils_test.go @@ -0,0 +1,62 @@ +package antlr + +import "testing" + +func testBitSet(t *testing.T, bs *BitSet, str string, length int, contains []int, minValue int, minLen int) { + t.Helper() + if got, want := bs.String(), str; got != want { + t.Errorf("%+v.String() = %q, want %q", bs, got, want) + } + if got, want := bs.length(), length; got != want { + t.Errorf("%+v.length() = %q, want %q", bs, got, want) + } + for i := 0; i < len(bs.data)*bitsPerWord; i++ { + var want bool + for _, val := range contains { + if i == val { + want = true + break + } + } + if got := bs.contains(i); got != want { + t.Errorf("%+v.contains(%v) = %v, want %v", bs, i, got, want) + } + } + if got, want := bs.minValue(), minValue; got != want { + t.Errorf("%+v.minValue() = %v, want %v", bs, got, want) + } + if got, want := bs.minLen(), minLen; got != want { + t.Errorf("%+v.minLen() = %v, want %v", bs, got, want) + } +} + +func TestBitSet(t *testing.T) { + bs1 := NewBitSet() + testBitSet(t, bs1, "{}", 0, []int{}, 2147483647, 0) + bs1.add(0) + testBitSet(t, bs1, "{0}", 1, []int{0}, 0, 1) + bs1.add(63) + testBitSet(t, bs1, "{0, 63}", 2, []int{0, 63}, 0, 1) + bs1.remove(0) + testBitSet(t, bs1, "{63}", 1, []int{63}, 63, 1) + bs1.add(20) + testBitSet(t, bs1, "{20, 63}", 2, []int{20, 63}, 20, 1) + bs1.clear(63) + testBitSet(t, bs1, "{20}", 1, []int{20}, 20, 1) + bs2 := NewBitSet() + bs2.add(64) + bs1.or(bs2) + testBitSet(t, bs1, "{20, 64}", 2, []int{20, 64}, 20, 2) + bs1.remove(20) + testBitSet(t, bs1, "{64}", 1, []int{64}, 64, 2) + bs3 := NewBitSet() + bs3.add(63) + bs1.or(bs3) + testBitSet(t, bs1, "{63, 64}", 2, []int{63, 64}, 63, 2) + bs1.clear(64) + bs4 := NewBitSet() + bs4.or(bs1) + if got, want := bs4.equals(bs1), true; got != want { + t.Errorf("%+v.equals(%+v) = %v, want %v", bs4, bs1, got, want) + } +} From b9d975a73fef4c6c9a59607274ea998a85799c3d Mon Sep 17 00:00:00 2001 From: "Jim.Idle" Date: Wed, 31 Aug 2022 17:13:02 +0800 Subject: [PATCH 3/3] feat: Create the v4 runtime layout for the Go runtime, ready for release tagging Note that the vast majority of the changes here are just copying the runtime file in to the /v4 subdirectory so that we can support legacy projects that use GOPATH only, as well as users that can use go modules. At a later release, we will delete the default path, and move the v4 subdirectory back to the top level. But, we cannot do that on this release. Signed-off-by: Jim.Idle --- .gitignore | 20 +++ doc/go-target.md | 148 ++++++++++++++++-- .../antlr/v4/test/runtime/helpers/Test.go.stg | 2 +- .../antlr/v4/test/runtime/go/GoRunner.java | 8 +- runtime/Go/antlr/v4/antlrdoc.go | 2 +- runtime/Go/antlr/v4/go.sum | 2 + .../antlr/v4/tool/templates/codegen/Go/Go.stg | 12 +- 7 files changed, 163 insertions(+), 31 deletions(-) create mode 100644 runtime/Go/antlr/v4/go.sum diff --git a/.gitignore b/.gitignore index 00040c195d..f517fa3f88 100644 --- a/.gitignore +++ b/.gitignore @@ -104,3 +104,23 @@ runtime/PHP # Swift binaries .build/ + +# Cpp generated build files +runtime/Cpp/CMakeCache.txt +runtime/Cpp/CMakeFiles/ +runtime/Cpp/CPackConfig.cmake +runtime/Cpp/CPackSourceConfig.cmake +runtime/Cpp/CTestTestfile.cmake +runtime/Cpp/Makefile +runtime/Cpp/_deps/ +runtime/Cpp/cmake_install.cmake +runtime/Cpp/runtime/CMakeFiles/ +runtime/Cpp/runtime/CTestTestfile.cmake +runtime/Cpp/runtime/Makefile +runtime/Cpp/runtime/antlr4_tests +runtime/Cpp/runtime/antlr4_tests\[1]_include.cmake +runtime/Cpp/runtime/antlr4_tests\[1]_tests.cmake +runtime/Cpp/runtime/cmake_install.cmake +runtime/Cpp/runtime/libantlr4-runtime.4.10.1.dylib +runtime/Cpp/runtime/libantlr4-runtime.a +runtime/Cpp/runtime/libantlr4-runtime.dylib diff --git a/doc/go-target.md b/doc/go-target.md index 6bdac133b3..b85c39cc65 100644 --- a/doc/go-target.md +++ b/doc/go-target.md @@ -8,32 +8,102 @@ #### 2. Get the Go ANTLR runtime -Each target language for ANTLR has a runtime package for running parser generated by ANTLR4. The runtime provides a common set of tools for using your parser. +Each target language for ANTLR has a runtime package for running parser generated by ANTLR4. +The runtime provides a common set of tools for using your parser. Note that if you have existing projects and have +yet to replace the `v1.x.x` modules with the `v4` modules, then you can skip ahead to the section *Upgrading to v4 +from earlier versions* -Get the runtime and install it on your GOPATH: +The Go runtime uses modules and has a version path of `/v4` to stay in sync with the runtime versions of all the other runtimes. +Setup is the same as any other module based project: ```bash -go get github.com/antlr/antlr4/runtime/Go/antlr +$ cd mymodproject +$ go mod init mymodproject ``` -#### 3. Set the release tag (optional) +After which, you can use go get, to get the latest release version of the ANTLR v4 runtime using: -`go get` has no native way to specify a branch or commit. So, when you run it, you'll download the latest commits. This may or may not be your preference. +```bash +go get github.com/antlr/antlr4/runtime/Go/antlr/v4 +``` -You'll need to use git to set the release. For example, to set the release tag for release 4.9.3: +If your project is already using the v4 runtime, then you can upgrade to the latest release using the usual: ```bash -cd $GOPATH/src/github.com/antlr/antlr4 # enter the antlr4 source directory -git checkout tags/4.9.3 # the go runtime was added in release 4.9.3 +go get -u github.com/antlr/antlr4/runtime/Go/antlr/v4 +``` +If you have not yet upgraded existing projects to the `/v4` module path, consult the section *Upgrading to v4 +from earlier versions* + +The ANTLR runtime has only one external dependency, and that is part of the go system itself: + +``` +golang.org/x/exp +``` + +A complete list of releases can be found on [the release page](https://github.com/antlr/antlr4/releases). The Go +runtime will be tagged using standard Go tags, so release 4.11.0 will be tagged with `v4.11.0` and go get will pick +that up from the ANTLR repo. + +#### 3. Configuring `go generate` in your project + +In order to promote the use of repeatable builds, it is often useful to add the latest tool jar to your project's +repo and configure a `generate.sh` and `generate.go` file. You can of course globally alias the java command required to run the +tool. Your own CI and dev environment will guide you. + +Here is how you can configure `go generate` for your project, assuming that you follow the general recommendation to +place the ANTLR grammar files in their own package in your project structure. Here is a general template as a starting point: + +``` + . + ├── myproject + ├── parser + │ ├── mygrammar.g4 + │ ├── antlr-4.11.0-complete.jar + │ ├── error_listeners.go + │ ├── generate.go + │ ├── generate.sh + ├── go.mod + ├── go.sum + ├── main.go + └── main_test.go +``` + +Make sure that the package statement in your grammar file(s) reflects the go package they exist in. +The `generate.go` file then looks like this: + +```golang + package parser + + //go:generate ./generate.sh +``` + +And the `generate.sh` file will look similar to this: + +```shell + + #!/bin/sh + + alias antlr4='java -Xmx500M -cp "./antlr4-4.11.0-complete.jar:$CLASSPATH" org.antlr.v4.Tool' + antlr4 -Dlanguage=Go -no-visitor -package parser *.g4 +``` + +From the command line at the root of your package “myproject” you can then simply issue the command: + +```shell + go generate ./... ``` -A complete list of releases can be found on [the release page](https://github.com/antlr/antlr4/releases). +If you have not yet run a `go get`, you can now run `go mod tidy` and update your -#### 4. Generate your parser +#### 4. Generate your parser manually You use the ANTLR4 "tool" to generate a parser. These will reference the ANTLR runtime, installed above. -Suppose you're using a UNIX system and have set up an alias for the ANTLR4 tool as described in [the getting started guide](getting-started.md). To generate your go parser, you'll need to invoke: +Suppose you're using a UNIX system and have set up an alias for the ANTLR4 tool as described in +[the getting started guide](getting-started.md). + +To generate your go parser, you'll need to invoke: ```bash antlr4 -Dlanguage=Go MyGrammar.g4 @@ -41,17 +111,59 @@ antlr4 -Dlanguage=Go MyGrammar.g4 For a full list of antlr4 tool options, please visit the [tool documentation page](tool-options.md). +### Upgrading to `/v4` from the default path + +*NB: While switch to new module path would normally imply that the public interface for the runtime has changed, this is +not actually the case - you will not need to change your existing code to upgrade. The main point of the path change is so +that git tagging works with the ANTLR Go runtime.* + +Prior to release v4.11.0 the Go runtime shipped with a module but the module had no version path. This meant that +the tags in the ANTLR repo did not work, as any tag above `v1` must refer to a matching module path. +So the command `go get github.com/antlr/antlr4/runtime/Go/antlr` would just bring in +whatever was the `HEAD` of the master branch. While this *kind of* worked, it is obviously subject to problems and does +not fit properly with the idiomatic ways of Go. + +As of v4.11.0 the module path for the Go runtime is properly in sync with the repo tags. However, this means you need to +perform a few simple actions in order to upgrade to the `/v4` path. + + - Firstly, make sure that you are using an ANTLR tool jar with a version number of 4.11.0 or greater. + - Next you replace any mention of the old (default) path to ANTLR in your go source files. Don't worry that this will +modify your generated files as... + - Now regenerate your grammar files either manually or using `go generate ./...` (see above) + +A quick way to replace original module path references is to use this script from your module's base directory: + +```shell +find . -type f \ + -name '*.go' \ + -exec sed -i -e 's,github.com/antlr/antlr4/runtime/Go/antlr,github.com/antlr/antlr4/runtime/Go/antlr/v4,g' {} \; +``` +After performing the steps above, issuing: + +```shell +go mod tidy +``` +Should fix up your `go.mod` file to reference only the `v4` version of the ANTLR Go runtime: + +```shell +require github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.11.0-xxxxxx-xxxxxxxxx +``` + +From this point on, your go mod commands will work correctly with the ANTLR repo and upgrades and downgrades will work +as you expect. As will branch version such as @dev + ### Referencing the Go ANTLR runtime You can reference the go ANTLR runtime package like this: -```go -import "github.com/antlr/antlr4/runtime/Go/antlr" +```golang +import "github.com/antlr/antlr4/runtime/Go/antlr/v4" ``` ### Complete example -Suppose you're using the JSON grammar from https://github.com/antlr/grammars-v4/tree/master/json. +Suppose you're using the JSON grammar from https://github.com/antlr/grammars-v4/tree/master/json placed in the parser +directory and have initialized your `go mod` file. Then, invoke `antlr4 -Dlanguage=Go JSON.g4`. The result of this is a collection of .go files in the `parser` directory including: ``` @@ -61,15 +173,17 @@ json_lexer.go json_listener.go ``` -Another common option to the ANTLR tool is `-visitor`, which generates a parse tree visitor, but we won't be doing that here. For a full list of antlr4 tool options, please visit the [tool documentation page](tool-options.md). +Another common option to the ANTLR tool is `-visitor`, which generates a parse tree visitor, but we won't be doing that here. +For a full list of antlr4 tool options, please visit the [tool documentation page](tool-options.md). -We'll write a small main func to call the generated parser/lexer (assuming they are separate). This one writes out the encountered `ParseTreeContext`'s. Suppose the gen'ed parser code is in the `parser` directory relative to this code: +We'll write a small main func to call the generated parser/lexer (assuming they are separate). This one writes out the +encountered `ParseTreeContext`'s. Assuming the generated parser code is in the `parser` directory relative to this code: ```golang package main import ( - "github.com/antlr/antlr4/runtime/Go/antlr" + "github.com/antlr/antlr4/runtime/Go/antlr/v4" "./parser" "os" "fmt" diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/helpers/Test.go.stg b/runtime-testsuite/resources/org/antlr/v4/test/runtime/helpers/Test.go.stg index 184cc2b4f7..d8b479b977 100644 --- a/runtime-testsuite/resources/org/antlr/v4/test/runtime/helpers/Test.go.stg +++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/helpers/Test.go.stg @@ -1,7 +1,7 @@ package main import ( "test/parser" - "github.com/antlr/antlr4/runtime/Go/antlr" + "github.com/antlr/antlr4/runtime/Go/antlr/v4" "fmt" "os" ) diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/go/GoRunner.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/go/GoRunner.java index e04d161a8e..06f023d378 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/go/GoRunner.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/go/GoRunner.java @@ -65,7 +65,7 @@ public String[] getExtraRunArgs() { private final static String antlrTestPackageName = "antlr"; private static final String goModFileName = "go.mod"; - private static final String GoRuntimeImportPath = "github.com/antlr/antlr4/runtime/Go/antlr"; + private static final String GoRuntimeImportPath = "github.com/antlr/antlr4/runtime/Go/antlr/v4"; private static final Path packageBase; private static final String packageBaseString; private static String goModContent = null; @@ -90,10 +90,6 @@ protected String grammarParseRuleToRecognizerName(String startRuleName) { @Override protected void initRuntime() throws Exception { - - Path packageDir = Paths.get(packageBaseString, "src", antlrTestPackageName); - Path runtimeBase = Paths.get(getRuntimePath("Go"), "antlr"); - // As we are using modules for the tests, we can use the `replace` directive to point // the tests at the runtime we are testing. We could use GOWORK, but then we would // have to remove each test module and add in the current one. So, we turn GOWORK off @@ -118,7 +114,7 @@ protected CompiledState compile(RunOptions runOptions, GeneratedState generatedS List generatedFiles = generatedState.generatedFiles; String tempDirPath = getTempDirPath(); - Path runtimeFiles = Paths.get(getRuntimePath("Go"), "antlr"); + Path runtimeFiles = Paths.get(getRuntimePath("Go"), "antlr/v4"); File generatedFir = new File(tempDirPath, "parser"); if (!generatedFir.mkdir()) { return new CompiledState(generatedState, new Exception("can't make dir " + generatedFir)); diff --git a/runtime/Go/antlr/v4/antlrdoc.go b/runtime/Go/antlr/v4/antlrdoc.go index 4d7825f965..aa3a5b363d 100644 --- a/runtime/Go/antlr/v4/antlrdoc.go +++ b/runtime/Go/antlr/v4/antlrdoc.go @@ -48,7 +48,7 @@ And the generate.sh file will look similar to this: #!/bin/sh alias antlr4='java -Xmx500M -cp "./antlr4-4.11.0-complete.jar:$CLASSPATH" org.antlr.v4.Tool' - antlr4 -Dlanguage=Go -no-visitor -package tgram *.g4 + antlr4 -Dlanguage=Go -no-visitor -package parser *.g4 depending on whether you want visitors or listeners or any other ANTLR options. diff --git a/runtime/Go/antlr/v4/go.sum b/runtime/Go/antlr/v4/go.sum new file mode 100644 index 0000000000..2b05f22a47 --- /dev/null +++ b/runtime/Go/antlr/v4/go.sum @@ -0,0 +1,2 @@ +golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e h1:+WEEuIdZHnUeJJmEUjyYC2gfUMj69yZXw17EnHg/otA= +golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2FK3vxD90NdsKWRuOIl2O4CvYbA= diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg index 15df5efbf4..613d994d43 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Go/Go.stg @@ -16,7 +16,7 @@ import ( "strconv" "sync" - "github.com/antlr/antlr4/runtime/Go/antlr" + "github.com/antlr/antlr4/runtime/Go/antlr/v4" ) @@ -45,7 +45,7 @@ package // package parser // -import "github.com/antlr/antlr4/runtime/Go/antlr" +import "github.com/antlr/antlr4/runtime/Go/antlr/v4" // Listener is a complete listener for a parse tree produced by . type Listener interface { @@ -69,7 +69,7 @@ package // package parser // -import "github.com/antlr/antlr4/runtime/Go/antlr" +import "github.com/antlr/antlr4/runtime/Go/antlr/v4" // BaseListener is a complete listener for a parse tree produced by . type BaseListener struct{} @@ -105,7 +105,7 @@ package // package parser // -import "github.com/antlr/antlr4/runtime/Go/antlr" +import "github.com/antlr/antlr4/runtime/Go/antlr/v4"
@@ -131,7 +131,7 @@ package // package parser // -import "github.com/antlr/antlr4/runtime/Go/antlr" +import "github.com/antlr/antlr4/runtime/Go/antlr/v4" type BaseVisitor struct { *antlr.BaseParseTreeVisitor @@ -1401,7 +1401,7 @@ import ( "sync" "unicode" - "github.com/antlr/antlr4/runtime/Go/antlr" + "github.com/antlr/antlr4/runtime/Go/antlr/v4" )