diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java index 5932cb6618..ad53c0360a 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java @@ -401,4 +401,39 @@ public void testLabelsForTokensWithMixedTypesLRWithoutLabels() { testErrors(test, false); } + + @Test public void testUnreachableTokens() { + String[] test = { + "lexer grammar Test;\n" + + "TOKEN1: 'as' 'df' | 'qwer';\n" + + "TOKEN2: [0-9];\n" + + "TOKEN3: 'asdf';\n" + + "TOKEN4: 'q' 'w' 'e' 'r' | A;\n" + + "TOKEN5: 'aaaa';\n" + + "TOKEN6: 'asdf';\n" + + "TOKEN7: 'qwer'+;\n" + + "TOKEN8: 'a' 'b' | 'b' | 'a' 'b';\n" + + "fragment\n" + + "TOKEN9: 'asdf' | 'qwer' | 'qwer';\n" + + "TOKEN10: '\\r\\n' | '\\r\\n';\n" + + "TOKEN11: '\\r\\n';\n" + + "\n" + + "mode MODE1;\n" + + "TOKEN12: 'asdf';\n" + + "\n" + + "fragment A: 'A';", + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:4:0: One of the token TOKEN3 values unreachable. asdf is always overlapped by token TOKEN1\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:5:0: One of the token TOKEN4 values unreachable. qwer is always overlapped by token TOKEN1\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:7:0: One of the token TOKEN6 values unreachable. asdf is always overlapped by token TOKEN1\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:7:0: One of the token TOKEN6 values unreachable. asdf is always overlapped by token TOKEN3\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:9:0: One of the token TOKEN8 values unreachable. ab is always overlapped by token TOKEN8\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:11:0: One of the token TOKEN9 values unreachable. qwer is always overlapped by token TOKEN9\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:12:0: One of the token TOKEN10 values unreachable. \\r\\n is always overlapped by token TOKEN10\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:13:0: One of the token TOKEN11 values unreachable. \\r\\n is always overlapped by token TOKEN10\n" + + "warning(" + ErrorType.TOKEN_UNREACHABLE.code + "): Test.g4:13:0: One of the token TOKEN11 values unreachable. \\r\\n is always overlapped by token TOKEN10\n" + }; + + testErrors(test, false); + } } diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java index b19d6c8658..a8bb864741 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java @@ -269,11 +269,11 @@ public void AllErrorCodesDistinct() { "grammar A;\n" + "tokens{Foo}\n" + "b : Foo ;\n" + - "X : 'foo' -> popmode;\n" + // "meant" to use -> popMode - "Y : 'foo' -> token(Foo);", // "meant" to use -> type(Foo) + "X : 'foo1' -> popmode;\n" + // "meant" to use -> popMode + "Y : 'foo2' -> token(Foo);", // "meant" to use -> type(Foo) - "error(" + ErrorType.INVALID_LEXER_COMMAND.code + "): A.g4:4:13: lexer command popmode does not exist or is not supported by the current target\n" + - "error(" + ErrorType.INVALID_LEXER_COMMAND.code + "): A.g4:5:13: lexer command token does not exist or is not supported by the current target\n" + "error(" + ErrorType.INVALID_LEXER_COMMAND.code + "): A.g4:4:14: lexer command popmode does not exist or is not supported by the current target\n" + + "error(" + ErrorType.INVALID_LEXER_COMMAND.code + "): A.g4:5:14: lexer command token does not exist or is not supported by the current target\n" }; super.testErrors(pair, true); } @@ -283,11 +283,11 @@ public void AllErrorCodesDistinct() { "grammar A;\n" + "tokens{Foo}\n" + "b : Foo ;\n" + - "X : 'foo' -> popMode(Foo);\n" + // "meant" to use -> popMode - "Y : 'foo' -> type;", // "meant" to use -> type(Foo) + "X : 'foo1' -> popMode(Foo);\n" + // "meant" to use -> popMode + "Y : 'foo2' -> type;", // "meant" to use -> type(Foo) - "error(" + ErrorType.UNWANTED_LEXER_COMMAND_ARGUMENT.code + "): A.g4:4:13: lexer command popMode does not take any arguments\n" + - "error(" + ErrorType.MISSING_LEXER_COMMAND_ARGUMENT.code + "): A.g4:5:13: missing argument for lexer command type\n" + "error(" + ErrorType.UNWANTED_LEXER_COMMAND_ARGUMENT.code + "): A.g4:4:14: lexer command popMode does not take any arguments\n" + + "error(" + ErrorType.MISSING_LEXER_COMMAND_ARGUMENT.code + "): A.g4:5:14: missing argument for lexer command type\n" }; super.testErrors(pair, true); } diff --git a/tool/src/org/antlr/v4/semantics/SemanticPipeline.java b/tool/src/org/antlr/v4/semantics/SemanticPipeline.java index 17a11b08d4..dc3cb2fb00 100644 --- a/tool/src/org/antlr/v4/semantics/SemanticPipeline.java +++ b/tool/src/org/antlr/v4/semantics/SemanticPipeline.java @@ -108,6 +108,7 @@ public void process() { } symcheck.checkForModeConflicts(g); + symcheck.checkForUnreachableTokens(g); assignChannelTypes(g, collector.channelDefs); diff --git a/tool/src/org/antlr/v4/semantics/SymbolChecks.java b/tool/src/org/antlr/v4/semantics/SymbolChecks.java index fb972271da..abbf22de3e 100644 --- a/tool/src/org/antlr/v4/semantics/SymbolChecks.java +++ b/tool/src/org/antlr/v4/semantics/SymbolChecks.java @@ -7,7 +7,9 @@ package org.antlr.v4.semantics; import org.antlr.runtime.tree.CommonTree; +import org.antlr.runtime.tree.Tree; import org.antlr.v4.automata.LexerATNFactory; +import org.antlr.v4.parse.ANTLRLexer; import org.antlr.v4.parse.ANTLRParser; import org.antlr.v4.runtime.Token; import org.antlr.v4.tool.Alternative; @@ -23,6 +25,7 @@ import org.antlr.v4.tool.Rule; import org.antlr.v4.tool.ast.AltAST; import org.antlr.v4.tool.ast.GrammarAST; +import org.antlr.v4.tool.ast.TerminalAST; import java.util.ArrayList; import java.util.Collection; @@ -39,42 +42,31 @@ * Side-effect: strip away redef'd rules. */ public class SymbolChecks { - Grammar g; - SymbolCollector collector; - Map nameToRuleMap = new HashMap(); + Grammar g; + SymbolCollector collector; + Map nameToRuleMap = new HashMap(); Set tokenIDs = new HashSet(); - Map> actionScopeToActionNames = new HashMap>(); -// DoubleKeyMap namedActions = -// new DoubleKeyMap(); + Map> actionScopeToActionNames = new HashMap>(); public ErrorManager errMgr; protected final Set reservedNames = new HashSet(); + { reservedNames.addAll(LexerATNFactory.getCommonConstants()); } - public SymbolChecks(Grammar g, SymbolCollector collector) { - this.g = g; - this.collector = collector; + public SymbolChecks(Grammar g, SymbolCollector collector) { + this.g = g; + this.collector = collector; this.errMgr = g.tool.errMgr; - for (GrammarAST tokenId : collector.tokenIDRefs) { - tokenIDs.add(tokenId.getText()); - } - /* - System.out.println("rules="+collector.rules); - System.out.println("rulerefs="+collector.rulerefs); - System.out.println("tokenIDRefs="+collector.tokenIDRefs); - System.out.println("terminals="+collector.terminals); - System.out.println("strings="+collector.strings); - System.out.println("tokensDef="+collector.tokensDefs); - System.out.println("actions="+collector.actions); - System.out.println("scopes="+collector.scopes); - */ - } - - public void process() { + for (GrammarAST tokenId : collector.tokenIDRefs) { + tokenIDs.add(tokenId.getText()); + } + } + + public void process() { // methods affect fields, but no side-effects outside this object // So, call order sensitive // First collect all rules for later use in checkForLabelConflict() @@ -83,7 +75,6 @@ public void process() { } checkReservedNames(g.rules.values()); checkActionRedefinitions(collector.namedActions); - checkForTokenConflicts(collector.tokenIDRefs); // sets tokenIDs checkForLabelConflicts(g.rules.values()); } @@ -116,21 +107,14 @@ public void checkActionRedefinitions(List actions) { } } - public void checkForTokenConflicts(List tokenIDRefs) { -// for (GrammarAST a : tokenIDRefs) { -// Token t = a.token; -// String ID = t.getText(); -// tokenIDs.add(ID); -// } - } - - /** Make sure a label doesn't conflict with another symbol. - * Labels must not conflict with: rules, tokens, scope names, - * return values, parameters, and rule-scope dynamic attributes - * defined in surrounding rule. Also they must have same type - * for repeated defs. - */ - public void checkForLabelConflicts(Collection rules) { + /** + * Make sure a label doesn't conflict with another symbol. + * Labels must not conflict with: rules, tokens, scope names, + * return values, parameters, and rule-scope dynamic attributes + * defined in surrounding rule. Also they must have same type + * for repeated defs. + */ + public void checkForLabelConflicts(Collection rules) { for (Rule r : rules) { checkForAttributeConflicts(r); @@ -213,7 +197,7 @@ private void checkForTypeMismatch(Rule r, LabelElementPair prevLabelPair, LabelE // Such behavior is referring to the fact that the warning is typically reported on the actual label redefinition, // but for left-recursive rules the warning is reported on the enclosing rule. org.antlr.runtime.Token token = r instanceof LeftRecursiveRule - ? ((GrammarAST) r.ast.getChild(0)).getToken() + ? ((GrammarAST) r.ast.getChild(0)).getToken() : labelPair.label.token; errMgr.grammarError( ErrorType.LABEL_TYPE_CONFLICT, @@ -227,7 +211,7 @@ private void checkForTypeMismatch(Rule r, LabelElementPair prevLabelPair, LabelE (labelPair.type.equals(LabelType.RULE_LABEL) || labelPair.type.equals(LabelType.RULE_LIST_LABEL))) { org.antlr.runtime.Token token = r instanceof LeftRecursiveRule - ? ((GrammarAST) r.ast.getChild(0)).getToken() + ? ((GrammarAST) r.ast.getChild(0)).getToken() : labelPair.label.token; String prevLabelOp = prevLabelPair.type.equals(LabelType.RULE_LIST_LABEL) ? "+=" : "="; String labelOp = labelPair.type.equals(LabelType.RULE_LIST_LABEL) ? "+=" : "="; @@ -291,11 +275,11 @@ protected void checkDeclarationRuleConflicts(Rule r, AttributeDict attributes, S for (Attribute attribute : attributes.attributes.values()) { if (ruleNames.contains(attribute.name)) { errMgr.grammarError( - errorType, - g.fileName, - attribute.token != null ? attribute.token : ((GrammarAST)r.ast.getChild(0)).token, - attribute.name, - r.name); + errorType, + g.fileName, + attribute.token != null ? attribute.token : ((GrammarAST) r.ast.getChild(0)).token, + attribute.name, + r.name); } } } @@ -308,11 +292,11 @@ protected void checkLocalConflictingDeclarations(Rule r, AttributeDict attribute Set conflictingKeys = attributes.intersection(referenceAttributes); for (String key : conflictingKeys) { errMgr.grammarError( - errorType, - g.fileName, - attributes.get(key).token != null ? attributes.get(key).token : ((GrammarAST) r.ast.getChild(0)).token, - key, - r.name); + errorType, + g.fileName, + attributes.get(key).token != null ? attributes.get(key).token : ((GrammarAST)r.ast.getChild(0)).token, + key, + r.name); } } @@ -341,8 +325,122 @@ public void checkForModeConflicts(Grammar g) { } } - // CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline) + /** + * Algorithm steps: + * 1. Collect all simple string literals (i.e. 'asdf', 'as' 'df', but not [a-z]+, 'a'..'z') + * for all lexer rules in each mode except of autogenerated tokens ({@link #getSingleTokenValues(Rule) getSingleTokenValues}) + * 2. Compare every string literal with each other ({@link #checkForOverlap(Grammar, Rule, Rule, List, List) checkForOverlap}) + * and throw TOKEN_UNREACHABLE warning if the same string found. + * Complexity: O(m * n^2 / 2), approximately equals to O(n^2) + * where m - number of modes, n - average number of lexer rules per mode. + * See also testUnreachableTokens unit test for details. + */ + public void checkForUnreachableTokens(Grammar g) { + if (g.isLexer()) { + LexerGrammar lexerGrammar = (LexerGrammar)g; + for (List rules : lexerGrammar.modes.values()) { + // Collect string literal lexer rules for each mode + List stringLiteralRules = new ArrayList<>(); + List> stringLiteralValues = new ArrayList<>(); + for (int i = 0; i < rules.size(); i++) { + Rule rule = rules.get(i); + + List ruleStringAlts = getSingleTokenValues(rule); + if (ruleStringAlts != null && ruleStringAlts.size() > 0) { + stringLiteralRules.add(rule); + stringLiteralValues.add(ruleStringAlts); + } + } + + // Check string sets intersection + for (int i = 0; i < stringLiteralRules.size(); i++) { + List firstTokenStringValues = stringLiteralValues.get(i); + Rule rule1 = stringLiteralRules.get(i); + checkForOverlap(g, rule1, rule1, firstTokenStringValues, stringLiteralValues.get(i)); + + // Check fragment rules only with themself + if (!rule1.isFragment()) { + for (int j = i + 1; j < stringLiteralRules.size(); j++) { + Rule rule2 = stringLiteralRules.get(j); + if (!rule2.isFragment()) { + checkForOverlap(g, rule1, stringLiteralRules.get(j), firstTokenStringValues, stringLiteralValues.get(j)); + } + } + } + } + } + } + } + + /** + * {@return} list of simple string literals for rule {@param rule} + */ + private List getSingleTokenValues(Rule rule) + { + List values = new ArrayList<>(); + for (Alternative alt : rule.alt) { + if (alt != null) { + // select first alt if token has a command + Tree rootNode = alt.ast.getChildCount() == 2 && + alt.ast.getChild(0) instanceof AltAST && alt.ast.getChild(1) instanceof GrammarAST + ? alt.ast.getChild(0) + : alt.ast; + + if (rootNode.getTokenStartIndex() == -1) { + continue; // ignore autogenerated tokens from combined grammars that start with T__ + } + + // Ignore alt if contains not only string literals (repetition, optional) + boolean ignore = false; + StringBuilder currentValue = new StringBuilder(); + for (int i = 0; i < rootNode.getChildCount(); i++) { + Tree child = rootNode.getChild(i); + if (!(child instanceof TerminalAST)) { + ignore = true; + break; + } + + TerminalAST terminalAST = (TerminalAST)child; + if (terminalAST.token.getType() != ANTLRLexer.STRING_LITERAL) { + ignore = true; + break; + } + else { + String text = terminalAST.token.getText(); + currentValue.append(text.substring(1, text.length() - 1)); + } + } + if (!ignore) { + values.add(currentValue.toString()); + } + } + } + return values; + } + + /** + * For same rule compare values from next index: + * TOKEN_WITH_SAME_VALUES: 'asdf' | 'asdf'; + * For different rules compare from start value: + * TOKEN1: 'asdf'; + * TOKEN2: 'asdf'; + */ + private void checkForOverlap(Grammar g, Rule rule1, Rule rule2, List firstTokenStringValues, List secondTokenStringValues) { + for (int i = 0; i < firstTokenStringValues.size(); i++) { + int secondTokenInd = rule1 == rule2 ? i + 1 : 0; + String str1 = firstTokenStringValues.get(i); + for (int j = secondTokenInd; j < secondTokenStringValues.size(); j++) { + String str2 = secondTokenStringValues.get(j); + if (str1.equals(str2)) { + errMgr.grammarError(ErrorType.TOKEN_UNREACHABLE, g.fileName, + ((GrammarAST) rule2.ast.getChild(0)).token, rule2.name, str2, rule1.name); + } + } + } + } + + // CAN ONLY CALL THE TWO NEXT METHODS AFTER GRAMMAR HAS RULE DEFS (see semanticpipeline) public void checkRuleArgs(Grammar g, List rulerefs) { if ( rulerefs==null ) return; for (GrammarAST ref : rulerefs) { @@ -351,12 +449,12 @@ public void checkRuleArgs(Grammar g, List rulerefs) { GrammarAST arg = (GrammarAST)ref.getFirstChildWithType(ANTLRParser.ARG_ACTION); if ( arg!=null && (r==null || r.args==null) ) { errMgr.grammarError(ErrorType.RULE_HAS_NO_ARGS, - g.fileName, ref.token, ruleName); + g.fileName, ref.token, ruleName); } - else if ( arg==null && (r!=null&&r.args!=null) ) { + else if ( arg==null && (r!=null && r.args!=null) ) { errMgr.grammarError(ErrorType.MISSING_RULE_ARGS, - g.fileName, ref.token, ruleName); + g.fileName, ref.token, ruleName); } } } @@ -365,18 +463,18 @@ public void checkForQualifiedRuleIssues(Grammar g, List qualifiedRul for (GrammarAST dot : qualifiedRuleRefs) { GrammarAST grammar = (GrammarAST)dot.getChild(0); GrammarAST rule = (GrammarAST)dot.getChild(1); - g.tool.log("semantics", grammar.getText()+"."+rule.getText()); + g.tool.log("semantics", grammar.getText()+"."+rule.getText()); Grammar delegate = g.getImportedGrammar(grammar.getText()); if ( delegate==null ) { errMgr.grammarError(ErrorType.NO_SUCH_GRAMMAR_SCOPE, - g.fileName, grammar.token, grammar.getText(), - rule.getText()); + g.fileName, grammar.token, grammar.getText(), + rule.getText()); } else { if ( g.getRule(grammar.getText(), rule.getText())==null ) { errMgr.grammarError(ErrorType.NO_SUCH_RULE_IN_SCOPE, - g.fileName, rule.token, grammar.getText(), - rule.getText()); + g.fileName, rule.token, grammar.getText(), + rule.getText()); } } } diff --git a/tool/src/org/antlr/v4/tool/ErrorType.java b/tool/src/org/antlr/v4/tool/ErrorType.java index d7ad7ab40a..a0348035c3 100644 --- a/tool/src/org/antlr/v4/tool/ErrorType.java +++ b/tool/src/org/antlr/v4/tool/ErrorType.java @@ -1074,6 +1074,21 @@ public enum ErrorType { "unicode property escapes not allowed in lexer charset range: ", ErrorSeverity.ERROR), + /** + * Compiler Warning 184. + * + *

The token value overlapped by another token or self

+ * + *
+	 * TOKEN1: 'value';
+	 * TOKEN2: 'value'; // warning
+	 * 
+ */ + TOKEN_UNREACHABLE( + 184, + "One of the token values unreachable. is always overlapped by token ", + ErrorSeverity.WARNING), + /* * Backward incompatibility errors */