diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/LexerExec/EscapedCharacters.txt b/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/LexerExec/EscapedCharacters.txt new file mode 100644 index 0000000000..b1189d6f1d --- /dev/null +++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/LexerExec/EscapedCharacters.txt @@ -0,0 +1,16 @@ +[type] +Lexer + +[grammar] +lexer grammar L; +LF : '\\u000A'; +X : 'x'; + +[input] +"""x +""" + +[output] +[@0,0:0='x',<2>,1:0] +[@1,1:1='\n',<1>,1:1] +[@2,2:1='',<-1>,2:0] \ No newline at end of file diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/ParserExec/EOFInClosure.txt b/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/ParserExec/EOFInClosure.txt deleted file mode 100644 index c592fbfe65..0000000000 --- a/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/ParserExec/EOFInClosure.txt +++ /dev/null @@ -1,19 +0,0 @@ -[notes] -This test ensures that {@link org.antlr.v4.runtime.atn.ParserATNSimulator} does not produce a -{@link StackOverflowError} when it encounters an {@code EOF} transition -inside a closure. - -[type] -Parser - -[grammar] -grammar T; -prog : stat EOF; -stat : 'x' ('y' | EOF)*?; - -[start] -prog - -[input] -x - diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/SemPredEvalParser/PredFromAltTestedInLoopBack_1.txt b/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/SemPredEvalParser/PredFromAltTestedInLoopBack_1.txt new file mode 100644 index 0000000000..70ee58e5cd --- /dev/null +++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/SemPredEvalParser/PredFromAltTestedInLoopBack_1.txt @@ -0,0 +1,44 @@ +[notes] +Loopback doesn't eval predicate at start of alt + +[type] +Parser + +[grammar] +grammar T; +file_ +@after {} + : para para EOF ; +para: paraContent NL NL ; +paraContent : ('s'|'x'|{})>}? NL)+ ; +NL : '\n' ; +s : 's' ; +X : 'x' ; + +[start] +file_ + +[input] +"""s + + +x +""" + +[output] +"""(file_ (para (paraContent s) \n \n) (para (paraContent \n x \n)) ) +""" + +[errors] +"""line 5:0 mismatched input '' expecting {'s', '\n', 'x'} +""" + +[skip] +Cpp +CSharp +Dart +Go +Node +PHP +Python2 +Python3 diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/BaseJavaTest.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/BaseJavaTest.java index f8e33bd6a3..c8f02775ba 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/BaseJavaTest.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/BaseJavaTest.java @@ -461,7 +461,7 @@ public void testActions(String templates, String actionName, String action, Stri AnalysisPipeline anal = new AnalysisPipeline(g); anal.process(); - CodeGenerator gen = new CodeGenerator(g); + CodeGenerator gen = CodeGenerator.create(g); ST outputFileST = gen.generateParser(false); String output = outputFileST.render(); //System.out.println(output); diff --git a/tool-testsuite/test/org/antlr/v4/misc/UtilsTest.java b/tool-testsuite/test/org/antlr/v4/misc/UtilsTest.java index f9c9b5d507..0c008224f8 100644 --- a/tool-testsuite/test/org/antlr/v4/misc/UtilsTest.java +++ b/tool-testsuite/test/org/antlr/v4/misc/UtilsTest.java @@ -1,6 +1,7 @@ package org.antlr.v4.misc; import org.antlr.runtime.Token; +import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.tool.ast.GrammarAST; import org.junit.Assert; import org.junit.Test; diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestCodeGeneration.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestCodeGeneration.java index 6fa9674210..de16751680 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestCodeGeneration.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestCodeGeneration.java @@ -130,7 +130,7 @@ public List getEvalInfoForString(String grammarString, String pattern) t if (g.isLexer()) factory = new LexerATNFactory((LexerGrammar) g); g.atn = factory.createATN(); - CodeGenerator gen = new CodeGenerator(g); + CodeGenerator gen = CodeGenerator.create(g); ST outputFileST = gen.generateParser(); // STViz viz = outputFileST.inspect(); diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java index 94a6532d16..22be571c88 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java @@ -345,6 +345,42 @@ public void AllErrorCodesDistinct() { super.testErrors(pair, true); } + // Test for https://github.com/antlr/antlr4/issues/2860, https://github.com/antlr/antlr4/issues/1105 + @Test public void testEpsilonClosureInLexer() { + String grammar = + "lexer grammar T;\n" + + "TOKEN: '\\'' FRAGMENT '\\'';\n" + + "fragment FRAGMENT: ('x'|)+;"; + + String expected = + "error(" + ErrorType.EPSILON_CLOSURE.code + "): T.g4:3:9: rule FRAGMENT contains a closure with at least one alternative that can match an empty string\n"; + + String[] pair = new String[] { + grammar, + expected + }; + + super.testErrors(pair, true); + } + + // Test for https://github.com/antlr/antlr4/issues/3359 + @Test public void testEofClosure() { + String grammar = + "lexer grammar EofClosure;\n" + + "EofClosure: 'x' EOF*;\n" + + "EofInAlternative: 'y' ('z' | EOF);"; + + String expected = + "error(" + ErrorType.EOF_CLOSURE.code + "): EofClosure.g4:2:0: rule EofClosure contains a closure with at least one alternative that can match EOF\n"; + + String[] pair = new String[] { + grammar, + expected + }; + + super.testErrors(pair, true); + } + // Test for https://github.com/antlr/antlr4/issues/1203 @Test public void testEpsilonOptionalAndClosureAnalysis() { String grammar = @@ -452,8 +488,8 @@ public void AllErrorCodesDistinct() { "lexer grammar A;\n" + "STRING : '\\\"' '\\\"' 'x' ;"; String expected = - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:10: invalid escape sequence \\\"\n"+ - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:15: invalid escape sequence \\\"\n"; + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:10: invalid escape sequence \\\"\n"+ + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:15: invalid escape sequence \\\"\n"; String[] pair = new String[] { grammar, @@ -493,10 +529,9 @@ public void AllErrorCodesDistinct() { "lexer grammar A;\n" + "RULE : 'Foo \\uAABG \\x \\u';\n"; String expected = - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence \\uAABG\n" + - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence \\x\n" + - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence \\u\n" + - "warning("+ErrorType.EPSILON_TOKEN.code+"): A.g4:2:0: non-fragment lexer rule RULE can match the empty string\n"; + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence \\uAABG\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence \\x\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence \\u\n"; String[] pair = new String[] { grammar, @@ -536,7 +571,6 @@ public void AllErrorCodesDistinct() { @Test public void testInvalidCharSetsAndStringLiterals() { String grammar = "lexer grammar Test;\n" + - "INVALID_STRING_LITERAL: '\\\"' | '\\]' | '\\u24';\n" + "INVALID_STRING_LITERAL_RANGE: 'GH'..'LM';\n" + "INVALID_CHAR_SET: [\\u24\\uA2][\\{];\n" + //https://github.com/antlr/antlr4/issues/1077 "EMPTY_STRING_LITERAL_RANGE: 'F'..'A' | 'Z';\n" + @@ -549,21 +583,14 @@ public void AllErrorCodesDistinct() { "EMPTY_CHAR_SET_WITH_INVALID_ESCAPE_SEQUENCE: [\\'];"; // https://github.com/antlr/antlr4/issues/1556 String expected = - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:31: invalid escape sequence \\\"\n" + - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:38: invalid escape sequence \\]\n" + - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:45: invalid escape sequence \\u24\n" + - "error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:30: multi-character literals are not allowed in lexer sets: 'GH'\n" + - "error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:36: multi-character literals are not allowed in lexer sets: 'LM'\n" + - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence \\u24\\u\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:4:30: string literals and sets cannot be empty: []\n" + - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence \\{\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:4:40: string literals and sets cannot be empty: []\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n" + - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:11:84: invalid escape sequence \\'\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:11:84: string literals and sets cannot be empty: []\n" + - "warning("+ ErrorType.EPSILON_TOKEN.code + "): Test.g4:2:0: non-fragment lexer rule INVALID_STRING_LITERAL can match the empty string\n"; + "error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:2:30: multi-character literals are not allowed in lexer sets: 'GH'\n" + + "error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:2:36: multi-character literals are not allowed in lexer sets: 'LM'\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:30: invalid escape sequence \\u24\\u\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:40: invalid escape sequence \\{\n" + + "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:4:33: string literals and sets cannot be empty: 'F'..'A'\n" + + "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:30: string literals and sets cannot be empty: 'f'..'a'\n" + + "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:36: string literals and sets cannot be empty: []\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:10:84: invalid escape sequence \\'\n"; String[] pair = new String[] { grammar, @@ -587,33 +614,23 @@ public void AllErrorCodesDistinct() { "UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}];\n" + "UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE_2: [\\p{Letter}-Z];\n" + "UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE_3: [A-\\p{Number}];\n" + - "INVERTED_UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\P{Uppercase_Letter}-\\P{Number}];\n"; + "INVERTED_UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\P{Uppercase_Letter}-\\P{Number}];\n" + + "EMOJI_MODIFIER: [\\p{Grapheme_Cluster_Break=E_Base}];\n"; String expected = - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:32: invalid escape sequence \\u{}\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:2:32: string literals and sets cannot be empty: []\n" + - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:41: invalid escape sequence \\u{\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:3:41: string literals and sets cannot be empty: []\n" + - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:35: invalid escape sequence \\u{110\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:4:35: string literals and sets cannot be empty: []\n" + - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:5:32: invalid escape sequence \\p{}\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:32: string literals and sets cannot be empty: []\n" + - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:41: invalid escape sequence \\p{\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:41: string literals and sets cannot be empty: []\n" + - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:41: invalid escape sequence \\P{}\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:7:41: string literals and sets cannot be empty: []\n" + - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:8:34: invalid escape sequence \\p{NotAProperty}\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:8:34: string literals and sets cannot be empty: []\n" + - "warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:9:43: invalid escape sequence \\P{NotAProperty}\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:9:43: string literals and sets cannot be empty: []\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:32: invalid escape sequence \\u{}\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:41: invalid escape sequence \\u{\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:35: invalid escape sequence \\u{110\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:5:32: invalid escape sequence \\p{}\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:41: invalid escape sequence \\p{\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:41: invalid escape sequence \\P{}\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:8:34: invalid escape sequence \\p{NotAProperty}\n" + + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:9:43: invalid escape sequence \\P{NotAProperty}\n" + "error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:10:39: unicode property escapes not allowed in lexer charset range: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}]\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:10:39: string literals and sets cannot be empty: []\n" + "error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:11:41: unicode property escapes not allowed in lexer charset range: [\\p{Letter}-Z]\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:11:41: string literals and sets cannot be empty: []\n" + "error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:12:41: unicode property escapes not allowed in lexer charset range: [A-\\p{Number}]\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:12:41: string literals and sets cannot be empty: []\n" + "error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:13:48: unicode property escapes not allowed in lexer charset range: [\\P{Uppercase_Letter}-\\P{Number}]\n" + - "error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:13:48: string literals and sets cannot be empty: []\n"; + "error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:14:16: invalid escape sequence \\p{Grapheme_Cluster_Break=E_Base}\n"; String[] pair = new String[] { grammar, @@ -839,4 +856,11 @@ public void AllErrorCodesDistinct() { super.testErrors(pair, true); } + + @Test public void testRuleNamesAsTree() { + String grammar = "" + + "grammar T;\n" + + "tree : 'X';"; + super.testErrors(new String[] { grammar, "" }, true); + } } diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java index 2792462568..66941b9b6f 100644 --- a/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java +++ b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java @@ -7,7 +7,6 @@ package org.antlr.v4.test.tool; import org.antlr.v4.codegen.UnicodeEscapes; - import org.junit.Test; import static org.junit.Assert.assertEquals; @@ -15,64 +14,53 @@ public class TestUnicodeEscapes { @Test public void latinJavaEscape() { - StringBuilder sb = new StringBuilder(); - UnicodeEscapes.appendJavaStyleEscapedCodePoint(0x0061, sb); - assertEquals("\\u0061", sb.toString()); + checkUnicodeEscape("\\u0061", 0x0061, "Java"); } @Test public void latinPythonEscape() { - StringBuilder sb = new StringBuilder(); - UnicodeEscapes.appendPythonStyleEscapedCodePoint(0x0061, sb); - assertEquals("\\u0061", sb.toString()); + checkUnicodeEscape("\\u0061", 0x0061, "Python2"); + checkUnicodeEscape("\\u0061", 0x0061, "Python3"); } @Test public void latinSwiftEscape() { - StringBuilder sb = new StringBuilder(); - UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0x0061, sb); - assertEquals("\\u{0061}", sb.toString()); + checkUnicodeEscape("\\u{0061}", 0x0061, "Swift"); } @Test public void bmpJavaEscape() { - StringBuilder sb = new StringBuilder(); - UnicodeEscapes.appendJavaStyleEscapedCodePoint(0xABCD, sb); - assertEquals("\\uABCD", sb.toString()); + checkUnicodeEscape("\\uABCD", 0xABCD, "Java"); } @Test public void bmpPythonEscape() { - StringBuilder sb = new StringBuilder(); - UnicodeEscapes.appendPythonStyleEscapedCodePoint(0xABCD, sb); - assertEquals("\\uABCD", sb.toString()); + checkUnicodeEscape("\\uABCD", 0xABCD, "Python2"); + checkUnicodeEscape("\\uABCD", 0xABCD, "Python3"); } @Test public void bmpSwiftEscape() { - StringBuilder sb = new StringBuilder(); - UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0xABCD, sb); - assertEquals("\\u{ABCD}", sb.toString()); + checkUnicodeEscape("\\u{ABCD}", 0xABCD, "Swift"); } @Test public void smpJavaEscape() { - StringBuilder sb = new StringBuilder(); - UnicodeEscapes.appendJavaStyleEscapedCodePoint(0x1F4A9, sb); - assertEquals("\\uD83D\\uDCA9", sb.toString()); + checkUnicodeEscape("\\uD83D\\uDCA9", 0x1F4A9, "Java"); } @Test public void smpPythonEscape() { - StringBuilder sb = new StringBuilder(); - UnicodeEscapes.appendPythonStyleEscapedCodePoint(0x1F4A9, sb); - assertEquals("\\U0001F4A9", sb.toString()); + checkUnicodeEscape("\\U0001F4A9", 0x1F4A9, "Python2"); + checkUnicodeEscape("\\U0001F4A9", 0x1F4A9, "Python3"); } @Test public void smpSwiftEscape() { - StringBuilder sb = new StringBuilder(); - UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0x1F4A9, sb); - assertEquals("\\u{1F4A9}", sb.toString()); + checkUnicodeEscape("\\u{1F4A9}", 0x1F4A9, "Swift"); + } + + private void checkUnicodeEscape(String expected, int input, String language) { + assertEquals(expected, UnicodeEscapes.escapeCodePoint(input, language)); } } diff --git a/tool/src/org/antlr/v4/Tool.java b/tool/src/org/antlr/v4/Tool.java index d2c95f525d..4b4ffd1c2c 100644 --- a/tool/src/org/antlr/v4/Tool.java +++ b/tool/src/org/antlr/v4/Tool.java @@ -24,7 +24,6 @@ import org.antlr.v4.parse.GrammarTreeVisitor; import org.antlr.v4.parse.ToolANTLRLexer; import org.antlr.v4.parse.ToolANTLRParser; -import org.antlr.v4.parse.v3TreeGrammarException; import org.antlr.v4.runtime.RuntimeMetaData; import org.antlr.v4.runtime.misc.LogManager; import org.antlr.v4.runtime.misc.IntegerList; @@ -381,17 +380,16 @@ public void processNonCombinedGrammar(Grammar g, boolean gencode) { SemanticPipeline sem = new SemanticPipeline(g); sem.process(); - String language = g.getOptionString("language"); - if ( !CodeGenerator.targetExists(language) ) { - errMgr.toolError(ErrorType.CANNOT_CREATE_TARGET_GENERATOR, language); + if ( errMgr.getNumErrors()>prevErrors ) return; + + CodeGenerator codeGenerator = CodeGenerator.create(g); + if (codeGenerator == null) { return; } - if ( errMgr.getNumErrors()>prevErrors ) return; - // BUILD ATN FROM AST ATNFactory factory; - if ( g.isLexer() ) factory = new LexerATNFactory((LexerGrammar)g); + if ( g.isLexer() ) factory = new LexerATNFactory((LexerGrammar)g, codeGenerator); else factory = new ParserATNFactory(g); g.atn = factory.createATN(); @@ -409,7 +407,7 @@ public void processNonCombinedGrammar(Grammar g, boolean gencode) { // GENERATE CODE if ( gencode ) { - CodeGenPipeline gen = new CodeGenPipeline(g); + CodeGenPipeline gen = new CodeGenPipeline(g, codeGenerator); gen.process(); } } @@ -657,20 +655,15 @@ public GrammarRootAST parse(String fileName, CharStream in) { lexer.tokens = tokens; ToolANTLRParser p = new ToolANTLRParser(tokens, this); p.setTreeAdaptor(adaptor); - try { - ParserRuleReturnScope r = p.grammarSpec(); - GrammarAST root = (GrammarAST)r.getTree(); - if ( root instanceof GrammarRootAST) { - ((GrammarRootAST)root).hasErrors = lexer.getNumberOfSyntaxErrors()>0 || p.getNumberOfSyntaxErrors()>0; - assert ((GrammarRootAST)root).tokenStream == tokens; - if ( grammarOptions!=null ) { - ((GrammarRootAST)root).cmdLineOptions = grammarOptions; - } - return ((GrammarRootAST)root); + ParserRuleReturnScope r = p.grammarSpec(); + GrammarAST root = (GrammarAST) r.getTree(); + if (root instanceof GrammarRootAST) { + ((GrammarRootAST) root).hasErrors = lexer.getNumberOfSyntaxErrors() > 0 || p.getNumberOfSyntaxErrors() > 0; + assert ((GrammarRootAST) root).tokenStream == tokens; + if (grammarOptions != null) { + ((GrammarRootAST) root).cmdLineOptions = grammarOptions; } - } - catch (v3TreeGrammarException e) { - errMgr.grammarError(ErrorType.V3_TREE_GRAMMAR, fileName, e.location); + return ((GrammarRootAST) root); } return null; } diff --git a/tool/src/org/antlr/v4/analysis/LeftRecursiveRuleAnalyzer.java b/tool/src/org/antlr/v4/analysis/LeftRecursiveRuleAnalyzer.java index ae8a441fe8..51abe78659 100644 --- a/tool/src/org/antlr/v4/analysis/LeftRecursiveRuleAnalyzer.java +++ b/tool/src/org/antlr/v4/analysis/LeftRecursiveRuleAnalyzer.java @@ -84,8 +84,7 @@ public void loadPrecRuleTemplates() { } // use codegen to get correct language templates; that's it though - CodeGenerator gen = new CodeGenerator(tool, null, language); - codegenTemplates = gen.getTemplates(); + codegenTemplates = CodeGenerator.create(tool, null, language).getTemplates(); } @Override diff --git a/tool/src/org/antlr/v4/analysis/LeftRecursiveRuleTransformer.java b/tool/src/org/antlr/v4/analysis/LeftRecursiveRuleTransformer.java index dd4dce6c7b..177c354c54 100644 --- a/tool/src/org/antlr/v4/analysis/LeftRecursiveRuleTransformer.java +++ b/tool/src/org/antlr/v4/analysis/LeftRecursiveRuleTransformer.java @@ -62,7 +62,7 @@ public LeftRecursiveRuleTransformer(GrammarRootAST ast, Collection rules, } public void translateLeftRecursiveRules() { - String language = g.getOptionString("language"); + String language = g.getLanguage(); // translate all recursive rules List leftRecursiveRuleNames = new ArrayList(); for (Rule r : rules) { diff --git a/tool/src/org/antlr/v4/automata/LexerATNFactory.java b/tool/src/org/antlr/v4/automata/LexerATNFactory.java index 5f39325d67..c3859a1f35 100644 --- a/tool/src/org/antlr/v4/automata/LexerATNFactory.java +++ b/tool/src/org/antlr/v4/automata/LexerATNFactory.java @@ -15,25 +15,7 @@ import org.antlr.v4.parse.ANTLRParser; import org.antlr.v4.runtime.IntStream; import org.antlr.v4.runtime.Lexer; -import org.antlr.v4.runtime.atn.ATN; -import org.antlr.v4.runtime.atn.ATNState; -import org.antlr.v4.runtime.atn.ActionTransition; -import org.antlr.v4.runtime.atn.AtomTransition; -import org.antlr.v4.runtime.atn.CodePointTransitions; -import org.antlr.v4.runtime.atn.LexerAction; -import org.antlr.v4.runtime.atn.LexerChannelAction; -import org.antlr.v4.runtime.atn.LexerCustomAction; -import org.antlr.v4.runtime.atn.LexerModeAction; -import org.antlr.v4.runtime.atn.LexerMoreAction; -import org.antlr.v4.runtime.atn.LexerPopModeAction; -import org.antlr.v4.runtime.atn.LexerPushModeAction; -import org.antlr.v4.runtime.atn.LexerSkipAction; -import org.antlr.v4.runtime.atn.LexerTypeAction; -import org.antlr.v4.runtime.atn.NotSetTransition; -import org.antlr.v4.runtime.atn.RuleStartState; -import org.antlr.v4.runtime.atn.SetTransition; -import org.antlr.v4.runtime.atn.TokensStartState; -import org.antlr.v4.runtime.atn.Transition; +import org.antlr.v4.runtime.atn.*; import org.antlr.v4.runtime.misc.Interval; import org.antlr.v4.runtime.misc.IntervalSet; import org.antlr.v4.tool.ErrorType; @@ -89,13 +71,15 @@ public class LexerATNFactory extends ParserATNFactory { protected Map actionToIndexMap = new HashMap(); public LexerATNFactory(LexerGrammar g) { + this(g, null); + } + + public LexerATNFactory(LexerGrammar g, CodeGenerator codeGenerator) { super(g); // use codegen to get correct language templates for lexer commands - String language = g.getOptionString("language"); String caseInsensitiveOption = g.getOptionString("caseInsensitive"); caseInsensitive = caseInsensitiveOption != null && caseInsensitiveOption.equals("true"); - CodeGenerator gen = new CodeGenerator(g.tool, null, language); - codegenTemplates = gen.getTemplates(); + codegenTemplates = (codeGenerator == null ? CodeGenerator.create(g) : codeGenerator).getTemplates(); } public static Set getCommonConstants() { @@ -142,6 +126,7 @@ public ATN createATN() { } ATNOptimizer.optimize(g, atn); + checkEpsilonClosure(); return atn; } @@ -380,10 +365,6 @@ public Handle charSetLiteral(GrammarAST charSetAST) { ATNState right = newState(charSetAST); IntervalSet set = getSetFromCharSetLiteral(charSetAST); - if (set.isNil()) { - g.tool.errMgr.grammarError(ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED, g.fileName, charSetAST.getToken(), "[]"); - } - left.addTransition(new SetTransition(right, set)); charSetAST.atnState = left; return new Handle(left, right); @@ -501,6 +482,11 @@ else if (c == '-' && !state.inRange && i != 0 && i != n - 1 && state.mode != Cha } // Whether or not we were in a range, we'll add the last code point found to the set. applyPrevState(charSetAST, set, state); + + if (set.isNil()) { + g.tool.errMgr.grammarError(ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED, g.fileName, charSetAST.getToken(), "[]"); + } + return set; } diff --git a/tool/src/org/antlr/v4/automata/ParserATNFactory.java b/tool/src/org/antlr/v4/automata/ParserATNFactory.java index 830b7fd33a..2d57dc93cb 100644 --- a/tool/src/org/antlr/v4/automata/ParserATNFactory.java +++ b/tool/src/org/antlr/v4/automata/ParserATNFactory.java @@ -110,17 +110,7 @@ public ATN createATN() { addRuleFollowLinks(); addEOFTransitionToStartRules(); ATNOptimizer.optimize(g, atn); - - for (Triple pair : preventEpsilonClosureBlocks) { - LL1Analyzer analyzer = new LL1Analyzer(atn); - ATNState blkStart = pair.b; - ATNState blkStop = pair.c; - IntervalSet lookahead = analyzer.LOOK(blkStart, blkStop, null); - if ( lookahead.contains(org.antlr.v4.runtime.Token.EPSILON)) { - ErrorType errorType = pair.a instanceof LeftRecursiveRule ? ErrorType.EPSILON_LR_FOLLOW : ErrorType.EPSILON_CLOSURE; - g.tool.errMgr.grammarError(errorType, g.fileName, ((GrammarAST)pair.a.ast.getChild(0)).getToken(), pair.a.name); - } - } + checkEpsilonClosure(); optionalCheck: for (Triple pair : preventEpsilonOptionalBlocks) { @@ -147,6 +137,22 @@ public ATN createATN() { return atn; } + protected void checkEpsilonClosure() { + for (Triple pair : preventEpsilonClosureBlocks) { + LL1Analyzer analyzer = new LL1Analyzer(atn); + ATNState blkStart = pair.b; + ATNState blkStop = pair.c; + IntervalSet lookahead = analyzer.LOOK(blkStart, blkStop, null); + if ( lookahead.contains(org.antlr.v4.runtime.Token.EPSILON)) { + ErrorType errorType = pair.a instanceof LeftRecursiveRule ? ErrorType.EPSILON_LR_FOLLOW : ErrorType.EPSILON_CLOSURE; + g.tool.errMgr.grammarError(errorType, g.fileName, ((GrammarAST)pair.a.ast.getChild(0)).getToken(), pair.a.name); + } + if ( lookahead.contains(org.antlr.v4.runtime.Token.EOF)) { + g.tool.errMgr.grammarError(ErrorType.EOF_CLOSURE, g.fileName, ((GrammarAST)pair.a.ast.getChild(0)).getToken(), pair.a.name); + } + } + } + protected void _createATN(Collection rules) { createRuleStartAndStopATNStates(); diff --git a/tool/src/org/antlr/v4/codegen/CodeGenPipeline.java b/tool/src/org/antlr/v4/codegen/CodeGenPipeline.java index 1f97ddd028..50acbd58f4 100644 --- a/tool/src/org/antlr/v4/codegen/CodeGenPipeline.java +++ b/tool/src/org/antlr/v4/codegen/CodeGenPipeline.java @@ -16,16 +16,15 @@ import java.util.List; public class CodeGenPipeline { - Grammar g; + final Grammar g; + final CodeGenerator gen; - public CodeGenPipeline(Grammar g) { + public CodeGenPipeline(Grammar g, CodeGenerator gen) { this.g = g; + this.gen = gen; } public void process() { - if ( !CodeGenerator.targetExists(g.getOptionString("language")) ) return; - - CodeGenerator gen = new CodeGenerator(g); IntervalSet idTypes = new IntervalSet(); idTypes.add(ANTLRParser.ID); idTypes.add(ANTLRParser.RULE_REF); diff --git a/tool/src/org/antlr/v4/codegen/CodeGenerator.java b/tool/src/org/antlr/v4/codegen/CodeGenerator.java index bc1080f3fa..e065466337 100644 --- a/tool/src/org/antlr/v4/codegen/CodeGenerator.java +++ b/tool/src/org/antlr/v4/codegen/CodeGenerator.java @@ -27,12 +27,10 @@ public class CodeGenerator { public static final String TEMPLATE_ROOT = "org/antlr/v4/tool/templates/codegen"; public static final String VOCAB_FILE_EXTENSION = ".tokens"; - public static final String DEFAULT_LANGUAGE = "Java"; public static final String vocabFilePattern = "=\n}>" + "=\n}>"; - public final Grammar g; public final Tool tool; @@ -43,62 +41,37 @@ public class CodeGenerator { public int lineWidth = 72; - private CodeGenerator(String language) { - this.g = null; - this.tool = null; - this.language = language; - } - - public CodeGenerator(Grammar g) { - this(g.tool, g, g.getOptionString("language")); - } - - public CodeGenerator(Tool tool, Grammar g, String language) { - this.g = g; - this.tool = tool; - this.language = language != null ? language : DEFAULT_LANGUAGE; + public static CodeGenerator create(Grammar g) { + return create(g.tool, g, g.getLanguage()); } - public static boolean targetExists(String language) { + public static CodeGenerator create(Tool tool, Grammar g, String language) { String targetName = "org.antlr.v4.codegen.target."+language+"Target"; try { Class c = Class.forName(targetName).asSubclass(Target.class); Constructor ctor = c.getConstructor(CodeGenerator.class); - CodeGenerator gen = new CodeGenerator(language); - Target target = ctor.newInstance(gen); - return target.templatesExist(); + CodeGenerator codeGenerator = new CodeGenerator(tool, g, language); + codeGenerator.target = ctor.newInstance(codeGenerator); + return codeGenerator; } - catch (Exception e) { // ignore errors; we're detecting presence only + catch (Exception e) { + g.tool.errMgr.toolError(ErrorType.CANNOT_CREATE_TARGET_GENERATOR, e, language); + return null; } - return false; } + private CodeGenerator(Tool tool, Grammar g, String language) { + this.g = g; + this.tool = tool; + this.language = language; + } public Target getTarget() { - if ( target == null && targetExists(language) ) { - loadLanguageTarget(language); - } return target; } - public STGroup getTemplates() { - Target t = getTarget(); - return t==null ? null : t.getTemplates(); - } - - protected void loadLanguageTarget(String language) { - String targetName = "org.antlr.v4.codegen.target."+language+"Target"; - try { - Class c = Class.forName(targetName).asSubclass(Target.class); - Constructor ctor = c.getConstructor(CodeGenerator.class); - target = ctor.newInstance(this); - } - catch (Exception e) { - tool.errMgr.toolError(ErrorType.CANNOT_CREATE_TARGET_GENERATOR, - e, - targetName); - } + return target.getTemplates(); } // CREATE TEMPLATES BY WALKING MODEL @@ -166,23 +139,23 @@ ST getTokenVocabOutput() { } public void writeRecognizer(ST outputFileST, boolean header) { - getTarget().genFile(g, outputFileST, getRecognizerFileName(header)); + target.genFile(g, outputFileST, getRecognizerFileName(header)); } public void writeListener(ST outputFileST, boolean header) { - getTarget().genFile(g, outputFileST, getListenerFileName(header)); + target.genFile(g, outputFileST, getListenerFileName(header)); } public void writeBaseListener(ST outputFileST, boolean header) { - getTarget().genFile(g, outputFileST, getBaseListenerFileName(header)); + target.genFile(g, outputFileST, getBaseListenerFileName(header)); } public void writeVisitor(ST outputFileST, boolean header) { - getTarget().genFile(g, outputFileST, getVisitorFileName(header)); + target.genFile(g, outputFileST, getVisitorFileName(header)); } public void writeBaseVisitor(ST outputFileST, boolean header) { - getTarget().genFile(g, outputFileST, getBaseVisitorFileName(header)); + target.genFile(g, outputFileST, getBaseVisitorFileName(header)); } public void writeVocabFile() { @@ -191,7 +164,7 @@ public void writeVocabFile() { ST tokenVocabSerialization = getTokenVocabOutput(); String fileName = getVocabFileName(); if ( fileName!=null ) { - getTarget().genFile(g, tokenVocabSerialization, fileName); + target.genFile(g, tokenVocabSerialization, fileName); } } @@ -218,11 +191,11 @@ public void write(ST code, String fileName) { public String getBaseListenerFileName() { return getBaseListenerFileName(false); } public String getBaseVisitorFileName() { return getBaseVisitorFileName(false); } - public String getRecognizerFileName(boolean header) { return getTarget().getRecognizerFileName(header); } - public String getListenerFileName(boolean header) { return getTarget().getListenerFileName(header); } - public String getVisitorFileName(boolean header) { return getTarget().getVisitorFileName(header); } - public String getBaseListenerFileName(boolean header) { return getTarget().getBaseListenerFileName(header); } - public String getBaseVisitorFileName(boolean header) { return getTarget().getBaseVisitorFileName(header); } + public String getRecognizerFileName(boolean header) { return target.getRecognizerFileName(header); } + public String getListenerFileName(boolean header) { return target.getListenerFileName(header); } + public String getVisitorFileName(boolean header) { return target.getVisitorFileName(header); } + public String getBaseListenerFileName(boolean header) { return target.getBaseListenerFileName(header); } + public String getBaseVisitorFileName(boolean header) { return target.getBaseVisitorFileName(header); } /** What is the name of the vocab file generated for this grammar? * Returns null if no .tokens file should be generated. diff --git a/tool/src/org/antlr/v4/codegen/Target.java b/tool/src/org/antlr/v4/codegen/Target.java index ec6d08f9ae..c51512a907 100644 --- a/tool/src/org/antlr/v4/codegen/Target.java +++ b/tool/src/org/antlr/v4/codegen/Target.java @@ -26,8 +26,6 @@ import org.stringtemplate.v4.StringRenderer; import org.stringtemplate.v4.misc.STMessage; -import java.net.URL; - /** */ public abstract class Target { /** For pure strings of Java 16-bit Unicode char, how can we display @@ -44,10 +42,9 @@ public abstract class Target { protected String[] targetCharValueEscape = new String[255]; protected final CodeGenerator gen; - private final String language; private STGroup templates; - protected Target(CodeGenerator gen, String language) { + protected Target(CodeGenerator gen) { targetCharValueEscape['\n'] = "\\n"; targetCharValueEscape['\r'] = "\\r"; targetCharValueEscape['\t'] = "\\t"; @@ -57,17 +54,14 @@ protected Target(CodeGenerator gen, String language) { targetCharValueEscape['\''] = "\\'"; targetCharValueEscape['"'] = "\\\""; this.gen = gen; - this.language = language; } + public String getLanguage() { return gen.language; } + public CodeGenerator getCodeGenerator() { return gen; } - public String getLanguage() { - return language; - } - /** ANTLR tool should check output templates / target are compatible with tool code generation. * For now, a simple string match used on x.y of x.y.z scheme. We use a method to avoid mismatches * between a template called VERSION. This value is checked against Tool.VERSION during load of templates. @@ -76,7 +70,9 @@ public String getLanguage() { * * @since 4.3 */ - public abstract String getVersion(); + public String getVersion() { + return Tool.VERSION; + } public STGroup getTemplates() { if (templates == null) { @@ -84,7 +80,7 @@ public STGroup getTemplates() { if ( version==null || !RuntimeMetaData.getMajorMinorVersion(version).equals(RuntimeMetaData.getMajorMinorVersion(Tool.VERSION))) { - gen.tool.errMgr.toolError(ErrorType.INCOMPATIBLE_TOOL_AND_TEMPLATES, version, Tool.VERSION, language); + gen.tool.errMgr.toolError(ErrorType.INCOMPATIBLE_TOOL_AND_TEMPLATES, version, Tool.VERSION, getLanguage()); } templates = loadTemplates(); } @@ -172,16 +168,32 @@ else if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(c)) { return buf.toString(); } + private void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb, boolean escape) { + if (escape) { + sb.append("\\"); + } + appendUnicodeEscapedCodePoint(codePoint, sb); + } + /** * Escape the Unicode code point appropriately for this language * and append the escaped value to {@code sb}. + * It exists for flexibility and backward compatibility with external targets + * The static method {@link UnicodeEscapes#appendEscapedCodePoint(StringBuilder, int, String)} can be used as well + * if default escaping method (Java) is used or language is officially supported */ - abstract protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb); + protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) { + UnicodeEscapes.appendEscapedCodePoint(sb, codePoint, getLanguage()); + } public String getTargetStringLiteralFromString(String s) { return getTargetStringLiteralFromString(s, true); } + public String getTargetStringLiteralFromANTLRStringLiteral(CodeGenerator generator, String literal, boolean addQuotes) { + return getTargetStringLiteralFromANTLRStringLiteral(generator, literal, addQuotes, false); + } + /** *

Convert from an ANTLR string literal found in a grammar file to an * equivalent string literal in the target language. @@ -201,15 +213,15 @@ public String getTargetStringLiteralFromString(String s) { public String getTargetStringLiteralFromANTLRStringLiteral( CodeGenerator generator, String literal, - boolean addQuotes) + boolean addQuotes, + boolean escapeSpecial) { StringBuilder sb = new StringBuilder(); - String is = literal; if ( addQuotes ) sb.append('"'); - for (int i = 1; i < is.length() -1; ) { - int codePoint = is.codePointAt(i); + for (int i = 1; i < literal.length() -1; ) { + int codePoint = literal.codePointAt(i); int toAdvance = Character.charCount(codePoint); if (codePoint == '\\') { // Anything escaped is what it is! We assume that @@ -218,7 +230,7 @@ public String getTargetStringLiteralFromANTLRStringLiteral( // is what the default implementation is dealing with and remove // the escape. The C target does this for instance. // - int escapedCodePoint = is.codePointAt(i+toAdvance); + int escapedCodePoint = literal.codePointAt(i+toAdvance); toAdvance++; switch (escapedCodePoint) { // Pass through any escapes that Java also needs @@ -230,13 +242,16 @@ public String getTargetStringLiteralFromANTLRStringLiteral( case 'f': case '\\': // Pass the escape through + if (escapeSpecial && escapedCodePoint != '\\') { + sb.append('\\'); + } sb.append('\\'); sb.appendCodePoint(escapedCodePoint); break; case 'u': // Either unnnn or u{nnnnnn} - if (is.charAt(i+toAdvance) == '{') { - while (is.charAt(i+toAdvance) != '}') { + if (literal.charAt(i+toAdvance) == '{') { + while (literal.charAt(i+toAdvance) != '}') { toAdvance++; } toAdvance++; @@ -244,16 +259,17 @@ public String getTargetStringLiteralFromANTLRStringLiteral( else { toAdvance += 4; } - if ( i+toAdvance <= is.length() ) { // we might have an invalid \\uAB or something - String fullEscape = is.substring(i, i+toAdvance); + if ( i+toAdvance <= literal.length() ) { // we might have an invalid \\uAB or something + String fullEscape = literal.substring(i, i+toAdvance); appendUnicodeEscapedCodePoint( CharSupport.getCharValueFromCharInGrammarLiteral(fullEscape), - sb); + sb, + escapeSpecial); } break; default: if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(escapedCodePoint)) { - appendUnicodeEscapedCodePoint(escapedCodePoint, sb); + appendUnicodeEscapedCodePoint(escapedCodePoint, sb, escapeSpecial); } else { sb.appendCodePoint(escapedCodePoint); @@ -268,7 +284,7 @@ public String getTargetStringLiteralFromANTLRStringLiteral( sb.append("\\\""); } else if (shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(codePoint)) { - appendUnicodeEscapedCodePoint(codePoint, sb); + appendUnicodeEscapedCodePoint(codePoint, sb, escapeSpecial); } else { sb.appendCodePoint(codePoint); @@ -505,30 +521,14 @@ public boolean grammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode) { protected abstract boolean visibleGrammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode); public boolean templatesExist() { - String groupFileName = CodeGenerator.TEMPLATE_ROOT + "/" + getLanguage() + "/" + getLanguage() + STGroup.GROUP_FILE_EXTENSION; - STGroup result = null; - try { - result = new STGroupFile(groupFileName); - } - catch (IllegalArgumentException iae) { - result = null; - } - return result!=null; + return loadTemplatesHelper(false) != null; } - protected STGroup loadTemplates() { - String groupFileName = CodeGenerator.TEMPLATE_ROOT + "/" + getLanguage() + "/" + getLanguage() + STGroup.GROUP_FILE_EXTENSION; - STGroup result = null; - try { - result = new STGroupFile(groupFileName); - } - catch (IllegalArgumentException iae) { - gen.tool.errMgr.toolError(ErrorType.MISSING_CODE_GEN_TEMPLATES, - iae, - language); + STGroup result = loadTemplatesHelper(true); + if (result == null) { + return null; } - if ( result==null ) return null; result.registerRenderer(Integer.class, new NumberRenderer()); result.registerRenderer(String.class, new StringRenderer()); result.setListener(new STErrorListener() { @@ -560,6 +560,20 @@ private void reportError(STMessage msg) { return result; } + private STGroup loadTemplatesHelper(boolean reportErrorIfFail) { + String language = getLanguage(); + String groupFileName = CodeGenerator.TEMPLATE_ROOT + "/" + language + "/" + language + STGroup.GROUP_FILE_EXTENSION; + try { + return new STGroupFile(groupFileName); + } + catch (IllegalArgumentException iae) { + if (reportErrorIfFail) { + gen.tool.errMgr.toolError(ErrorType.MISSING_CODE_GEN_TEMPLATES, iae, getLanguage()); + } + return null; + } + } + /** * @since 4.3 */ diff --git a/tool/src/org/antlr/v4/codegen/UnicodeEscapes.java b/tool/src/org/antlr/v4/codegen/UnicodeEscapes.java index 8c88b4aeef..06ece5d552 100644 --- a/tool/src/org/antlr/v4/codegen/UnicodeEscapes.java +++ b/tool/src/org/antlr/v4/codegen/UnicodeEscapes.java @@ -8,31 +8,43 @@ /** * Utility class to escape Unicode code points using various - * languages' syntaxes. + * languages' syntax. */ -public abstract class UnicodeEscapes { - static public void appendJavaStyleEscapedCodePoint(int codePoint, StringBuilder sb) { - if (Character.isSupplementaryCodePoint(codePoint)) { - // char is not an 'integral' type, so we have to explicitly convert - // to int before passing to the %X formatter or else it throws. - sb.append(String.format("\\u%04X", (int)Character.highSurrogate(codePoint))); - sb.append(String.format("\\u%04X", (int)Character.lowSurrogate(codePoint))); - } - else { - sb.append(String.format("\\u%04X", codePoint)); - } +public class UnicodeEscapes { + public static String escapeCodePoint(int codePoint, String language) { + StringBuilder result = new StringBuilder(); + appendEscapedCodePoint(result, codePoint, language); + return result.toString(); } - static public void appendPythonStyleEscapedCodePoint(int codePoint, StringBuilder sb) { - if (Character.isSupplementaryCodePoint(codePoint)) { - sb.append(String.format("\\U%08X", codePoint)); - } - else { - sb.append(String.format("\\u%04X", codePoint)); + public static void appendEscapedCodePoint(StringBuilder sb, int codePoint, String language) { + switch (language) { + case "CSharp": + case "Python2": + case "Python3": + case "Cpp": + case "Go": + case "PHP": + String format = Character.isSupplementaryCodePoint(codePoint) ? "\\U%08X" : "\\u%04X"; + sb.append(String.format(format, codePoint)); + break; + case "Swift": + sb.append(String.format("\\u{%04X}", codePoint)); + break; + case "Java": + case "JavaScript": + case "Dart": + default: + if (Character.isSupplementaryCodePoint(codePoint)) { + // char is not an 'integral' type, so we have to explicitly convert + // to int before passing to the %X formatter or else it throws. + sb.append(String.format("\\u%04X", (int)Character.highSurrogate(codePoint))); + sb.append(String.format("\\u%04X", (int)Character.lowSurrogate(codePoint))); + } + else { + sb.append(String.format("\\u%04X", codePoint)); + } + break; } } - - static public void appendSwiftStyleEscapedCodePoint(int codePoint, StringBuilder sb) { - sb.append(String.format("\\u{%04X}", codePoint)); - } } diff --git a/tool/src/org/antlr/v4/codegen/model/Recognizer.java b/tool/src/org/antlr/v4/codegen/model/Recognizer.java index 5d793904e3..5b63bed4b2 100644 --- a/tool/src/org/antlr/v4/codegen/model/Recognizer.java +++ b/tool/src/org/antlr/v4/codegen/model/Recognizer.java @@ -101,9 +101,8 @@ protected static String translateTokenStringToTarget(String tokenName, CodeGener } if (tokenName.charAt(0) == '\'') { - boolean addQuotes = false; String targetString = - gen.getTarget().getTargetStringLiteralFromANTLRStringLiteral(gen, tokenName, addQuotes); + gen.getTarget().getTargetStringLiteralFromANTLRStringLiteral(gen, tokenName, false, true); return "\"'" + targetString + "'\""; } else { diff --git a/tool/src/org/antlr/v4/codegen/model/SemPred.java b/tool/src/org/antlr/v4/codegen/model/SemPred.java index 3496b44a0b..00315d0c7f 100644 --- a/tool/src/org/antlr/v4/codegen/model/SemPred.java +++ b/tool/src/org/antlr/v4/codegen/model/SemPred.java @@ -67,6 +67,7 @@ public SemPred(OutputModelFactory factory, ActionAST ast) { else { msg = gen.getTarget().getTargetStringLiteralFromANTLRStringLiteral(gen, failNode.getText(), + true, true); } } diff --git a/tool/src/org/antlr/v4/codegen/target/CSharpTarget.java b/tool/src/org/antlr/v4/codegen/target/CSharpTarget.java index 5ddebd5b52..227949c6d2 100644 --- a/tool/src/org/antlr/v4/codegen/target/CSharpTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/CSharpTarget.java @@ -7,7 +7,6 @@ import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.codegen.Target; -import org.antlr.v4.codegen.UnicodeEscapes; import org.antlr.v4.tool.ErrorType; import org.antlr.v4.tool.ast.GrammarAST; import org.stringtemplate.v4.NumberRenderer; @@ -18,19 +17,13 @@ import org.stringtemplate.v4.misc.STMessage; public class CSharpTarget extends Target { - public CSharpTarget(CodeGenerator gen) { - super(gen, "CSharp"); + super(gen); targetCharValueEscape[0] = "\\0"; targetCharValueEscape[0x0007] = "\\a"; targetCharValueEscape[0x000B] = "\\v"; } - @Override - public String getVersion() { - return "4.9.3"; - } - @Override public String encodeIntAsCharEscape(int v) { if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) { @@ -59,7 +52,7 @@ protected boolean visibleGrammarSymbolCausesIssueInGeneratedCode(GrammarAST idNo @Override protected STGroup loadTemplates() { // override the superclass behavior to put all C# templates in the same folder - STGroup result = new STGroupFile(CodeGenerator.TEMPLATE_ROOT+"/CSharp/"+getLanguage()+STGroup.GROUP_FILE_EXTENSION); + STGroup result = new STGroupFile(CodeGenerator.TEMPLATE_ROOT+"/CSharp/"+ getLanguage()+STGroup.GROUP_FILE_EXTENSION); result.registerRenderer(Integer.class, new NumberRenderer()); result.registerRenderer(String.class, new StringRenderer()); result.setListener(new STErrorListener() { @@ -90,10 +83,4 @@ private void reportError(STMessage msg) { return result; } - - @Override - protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) { - // C# and Python share the same escaping style. - UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb); - } } diff --git a/tool/src/org/antlr/v4/codegen/target/CppTarget.java b/tool/src/org/antlr/v4/codegen/target/CppTarget.java index f52baf0702..41f5718210 100644 --- a/tool/src/org/antlr/v4/codegen/target/CppTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/CppTarget.java @@ -8,7 +8,6 @@ import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.codegen.Target; -import org.antlr.v4.codegen.UnicodeEscapes; import org.antlr.v4.tool.ErrorType; import org.antlr.v4.tool.ast.GrammarAST; import org.stringtemplate.v4.NumberRenderer; @@ -23,7 +22,6 @@ import java.util.Set; public class CppTarget extends Target { - protected static final String[] cppKeywords = { "alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand", "bitor", "bool", "break", "case", "catch", "char", "char16_t", @@ -46,14 +44,10 @@ public class CppTarget extends Target { protected final Set badWords = new HashSet(); public CppTarget(CodeGenerator gen) { - super(gen, "Cpp"); + super(gen); targetCharValueEscape['?'] = "\\?"; } - public String getVersion() { - return "4.9.3"; - } - public boolean needsHeader() { return true; } public Set getBadWords() { @@ -171,10 +165,4 @@ private void reportError(STMessage msg) { return result; } - - @Override - protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) { - // C99 and Python share the same escaping style. - UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb); - } } diff --git a/tool/src/org/antlr/v4/codegen/target/DartTarget.java b/tool/src/org/antlr/v4/codegen/target/DartTarget.java index 281aba286c..4945c31b88 100644 --- a/tool/src/org/antlr/v4/codegen/target/DartTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/DartTarget.java @@ -6,10 +6,8 @@ package org.antlr.v4.codegen.target; -import org.antlr.v4.Tool; import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.codegen.Target; -import org.antlr.v4.codegen.UnicodeEscapes; import org.antlr.v4.tool.ast.GrammarAST; import org.stringtemplate.v4.STGroup; import org.stringtemplate.v4.StringRenderer; @@ -19,7 +17,6 @@ import java.util.Set; public class DartTarget extends Target { - /** * The Java target can cache the code generation templates. */ @@ -47,19 +44,15 @@ public class DartTarget extends Target { protected final Set badWords = new HashSet(); public DartTarget(CodeGenerator gen) { - super(gen, "Dart"); + super(gen); targetCharValueEscape['$'] = "\\$"; } @Override - public String getTargetStringLiteralFromANTLRStringLiteral(CodeGenerator generator, String literal, boolean addQuotes) { - return super.getTargetStringLiteralFromANTLRStringLiteral(generator, literal, addQuotes).replace("$", "\\$"); - } - - @Override - public String getVersion() { - return Tool.VERSION; // Java and tool versions move in lock step + public String getTargetStringLiteralFromANTLRStringLiteral(CodeGenerator generator, String literal, boolean addQuotes, + boolean escapeSpecial) { + return super.getTargetStringLiteralFromANTLRStringLiteral(generator, literal, addQuotes, escapeSpecial).replace("$", "\\$"); } public Set getBadWords() { @@ -104,9 +97,4 @@ public String encodeIntAsCharEscape(int v) { return String.format("\\u{%X}", v & 0xFFFF); } - - @Override - protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) { - UnicodeEscapes.appendJavaStyleEscapedCodePoint(codePoint, sb); - } } diff --git a/tool/src/org/antlr/v4/codegen/target/GoTarget.java b/tool/src/org/antlr/v4/codegen/target/GoTarget.java index cf9bc47d21..30e255da37 100644 --- a/tool/src/org/antlr/v4/codegen/target/GoTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/GoTarget.java @@ -8,7 +8,6 @@ import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.codegen.Target; -import org.antlr.v4.codegen.UnicodeEscapes; import org.antlr.v4.parse.ANTLRParser; import org.antlr.v4.tool.Grammar; import org.antlr.v4.tool.ast.GrammarAST; @@ -30,7 +29,6 @@ * * */ public class GoTarget extends Target { - private static final String[] goKeywords = { "break", "default", "func", "interface", "select", "case", "defer", "go", "map", "struct", @@ -66,12 +64,7 @@ public class GoTarget extends Target { && !Boolean.parseBoolean(System.getProperty("antlr.go.disable-gofmt")); public GoTarget(CodeGenerator gen) { - super(gen, "Go"); - } - - @Override - public String getVersion() { - return "4.9.3"; + super(gen); } public Set getBadWords() { @@ -230,10 +223,4 @@ public String toString(Object o, String formatString, Locale locale) { } } - - @Override - protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) { - // Go and Python share the same escaping style. - UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb); - } } diff --git a/tool/src/org/antlr/v4/codegen/target/JavaScriptTarget.java b/tool/src/org/antlr/v4/codegen/target/JavaScriptTarget.java index a30d3f44b4..9254e21dba 100644 --- a/tool/src/org/antlr/v4/codegen/target/JavaScriptTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/JavaScriptTarget.java @@ -8,7 +8,6 @@ import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.codegen.Target; -import org.antlr.v4.codegen.UnicodeEscapes; import org.antlr.v4.tool.ast.GrammarAST; import org.stringtemplate.v4.STGroup; import org.stringtemplate.v4.StringRenderer; @@ -23,7 +22,6 @@ * @author Eric Vergnaud */ public class JavaScriptTarget extends Target { - /** Source: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar */ protected static final String[] javaScriptKeywords = { "break", "case", "class", "catch", "const", "continue", "debugger", @@ -46,14 +44,9 @@ public class JavaScriptTarget extends Target { protected final Set badWords = new HashSet(); public JavaScriptTarget(CodeGenerator gen) { - super(gen, "JavaScript"); + super(gen); } - @Override - public String getVersion() { - return "4.9.3"; - } - public Set getBadWords() { if (badWords.isEmpty()) { addBadWords(); @@ -139,10 +132,4 @@ public boolean wantsBaseVisitor() { public boolean supportsOverloadedMethods() { return false; } - - @Override - protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) { - // JavaScript and Java share the same escaping style. - UnicodeEscapes.appendJavaStyleEscapedCodePoint(codePoint, sb); - } } diff --git a/tool/src/org/antlr/v4/codegen/target/JavaTarget.java b/tool/src/org/antlr/v4/codegen/target/JavaTarget.java index 2482fef70e..2ef48fe568 100644 --- a/tool/src/org/antlr/v4/codegen/target/JavaTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/JavaTarget.java @@ -9,7 +9,6 @@ import org.antlr.v4.Tool; import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.codegen.Target; -import org.antlr.v4.codegen.UnicodeEscapes; import org.antlr.v4.tool.ast.GrammarAST; import org.stringtemplate.v4.STGroup; import org.stringtemplate.v4.StringRenderer; @@ -20,7 +19,6 @@ import java.util.Set; public class JavaTarget extends Target { - /** * The Java target can cache the code generation templates. */ @@ -41,10 +39,10 @@ public class JavaTarget extends Target { protected final Set badWords = new HashSet(); public JavaTarget(CodeGenerator gen) { - super(gen, "Java"); + super(gen); } - @Override + @Override public String getVersion() { return Tool.VERSION; // Java and tool versions move in lock step } @@ -98,11 +96,5 @@ public String toString(Object o, String formatString, Locale locale) { return super.toString(o, formatString, locale); } - - } - - @Override - protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) { - UnicodeEscapes.appendJavaStyleEscapedCodePoint(codePoint, sb); } } diff --git a/tool/src/org/antlr/v4/codegen/target/PHPTarget.java b/tool/src/org/antlr/v4/codegen/target/PHPTarget.java index 9072558f21..16b09c9979 100644 --- a/tool/src/org/antlr/v4/codegen/target/PHPTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/PHPTarget.java @@ -8,7 +8,6 @@ import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.codegen.Target; -import org.antlr.v4.codegen.UnicodeEscapes; import org.antlr.v4.tool.ast.GrammarAST; import org.stringtemplate.v4.STGroup; import org.stringtemplate.v4.StringRenderer; @@ -47,16 +46,10 @@ public class PHPTarget extends Target { private final Set badWords = new HashSet(); public PHPTarget(CodeGenerator gen) { - super(gen, "PHP"); - + super(gen); targetCharValueEscape['$'] = "\\$"; } - @Override - public String getVersion() { - return "4.9.3"; - } - @Override public String encodeIntAsCharEscape(int v) { if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) { @@ -98,16 +91,11 @@ public boolean supportsOverloadedMethods() { return false; } - @Override - protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) { - UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb); - } - @Override - public String getTargetStringLiteralFromANTLRStringLiteral(CodeGenerator generator, String literal, boolean addQuotes) { - String targetStringLiteral = super.getTargetStringLiteralFromANTLRStringLiteral(generator, literal, addQuotes); + public String getTargetStringLiteralFromANTLRStringLiteral(CodeGenerator generator, String literal, boolean addQuotes, + boolean escapeSpecial) { + String targetStringLiteral = super.getTargetStringLiteralFromANTLRStringLiteral(generator, literal, addQuotes, escapeSpecial); targetStringLiteral = targetStringLiteral.replace("$", "\\$"); - return targetStringLiteral; } } diff --git a/tool/src/org/antlr/v4/codegen/target/Python2Target.java b/tool/src/org/antlr/v4/codegen/target/Python2Target.java index e138019acf..1a2caff9f4 100644 --- a/tool/src/org/antlr/v4/codegen/target/Python2Target.java +++ b/tool/src/org/antlr/v4/codegen/target/Python2Target.java @@ -8,7 +8,6 @@ import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.codegen.Target; -import org.antlr.v4.codegen.UnicodeEscapes; import org.antlr.v4.tool.ast.GrammarAST; import org.stringtemplate.v4.STGroup; import org.stringtemplate.v4.StringRenderer; @@ -55,7 +54,7 @@ public class Python2Target extends Target { protected final Set badWords = new HashSet(); public Python2Target(CodeGenerator gen) { - super(gen, "Python2"); + super(gen); } @Override @@ -93,11 +92,6 @@ public boolean supportsOverloadedMethods() { return false; } - @Override - public String getVersion() { - return "4.9.3"; - } - public Set getBadWords() { if (badWords.isEmpty()) { addBadWords(); @@ -111,9 +105,4 @@ protected void addBadWords() { badWords.add("rule"); badWords.add("parserRule"); } - - @Override - protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) { - UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb); - } } diff --git a/tool/src/org/antlr/v4/codegen/target/Python3Target.java b/tool/src/org/antlr/v4/codegen/target/Python3Target.java index 97c7e2e4ad..d31df80093 100644 --- a/tool/src/org/antlr/v4/codegen/target/Python3Target.java +++ b/tool/src/org/antlr/v4/codegen/target/Python3Target.java @@ -8,7 +8,6 @@ import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.codegen.Target; -import org.antlr.v4.codegen.UnicodeEscapes; import org.antlr.v4.tool.ast.GrammarAST; import org.stringtemplate.v4.STGroup; import org.stringtemplate.v4.StringRenderer; @@ -51,7 +50,7 @@ public class Python3Target extends Target { }; public Python3Target(CodeGenerator gen) { - super(gen, "Python3"); + super(gen); } @Override @@ -95,11 +94,6 @@ public boolean supportsOverloadedMethods() { return false; } - @Override - public String getVersion() { - return "4.9.3"; - } - /** Avoid grammar symbols in this set to prevent conflicts in gen'd code. */ protected final Set badWords = new HashSet(); @@ -116,9 +110,4 @@ protected void addBadWords() { badWords.add("rule"); badWords.add("parserRule"); } - - @Override - protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) { - UnicodeEscapes.appendPythonStyleEscapedCodePoint(codePoint, sb); - } } diff --git a/tool/src/org/antlr/v4/codegen/target/SwiftTarget.java b/tool/src/org/antlr/v4/codegen/target/SwiftTarget.java index 10b8ae1cf8..6317219b10 100644 --- a/tool/src/org/antlr/v4/codegen/target/SwiftTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/SwiftTarget.java @@ -8,7 +8,6 @@ import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.codegen.Target; -import org.antlr.v4.codegen.UnicodeEscapes; import org.antlr.v4.runtime.Token; import org.antlr.v4.runtime.atn.ATN; import org.antlr.v4.runtime.atn.ATNDeserializer; @@ -58,7 +57,6 @@ import java.util.Set; public class SwiftTarget extends Target { - /** * The Swift target can cache the code generation templates. */ @@ -81,13 +79,9 @@ public class SwiftTarget extends Target { public String lexerAtnJSON = null; public String parserAtnJSON = null; - public SwiftTarget(CodeGenerator gen) { - super(gen, "Swift"); - } - @Override - public String getVersion() { - return "4.9.3"; // Java and tool versions move in lock step + public SwiftTarget(CodeGenerator gen) { + super(gen); } public Set getBadWords() { @@ -552,9 +546,4 @@ public String toString(Object o, String formatString, Locale locale) { } } - - @Override - protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) { - UnicodeEscapes.appendSwiftStyleEscapedCodePoint(codePoint, sb); - } } diff --git a/tool/src/org/antlr/v4/misc/CharSupport.java b/tool/src/org/antlr/v4/misc/CharSupport.java index 47e0033b7c..f4113fa7b4 100644 --- a/tool/src/org/antlr/v4/misc/CharSupport.java +++ b/tool/src/org/antlr/v4/misc/CharSupport.java @@ -17,11 +17,11 @@ public class CharSupport { /** When converting ANTLR char and string literals, here is the * value set of escape chars. */ - public static int ANTLRLiteralEscapedCharValue[] = new int[255]; + public static int[] ANTLRLiteralEscapedCharValue = new int[255]; /** Given a char, we need to be able to show as an ANTLR literal. */ - public static String ANTLRLiteralCharValueEscape[] = new String[255]; + public static String[] ANTLRLiteralCharValueEscape = new String[255]; static { ANTLRLiteralEscapedCharValue['n'] = '\n'; diff --git a/tool/src/org/antlr/v4/misc/EscapeSequenceParsing.java b/tool/src/org/antlr/v4/misc/EscapeSequenceParsing.java index d34988d41b..94b7e397e8 100644 --- a/tool/src/org/antlr/v4/misc/EscapeSequenceParsing.java +++ b/tool/src/org/antlr/v4/misc/EscapeSequenceParsing.java @@ -139,7 +139,7 @@ else if (escaped == 'p' || escaped == 'P') { } String propertyName = s.substring(openBraceOffset + 1, closeBraceOffset); IntervalSet propertyIntervalSet = UnicodeData.getPropertyCodePoints(propertyName); - if (propertyIntervalSet == null) { + if (propertyIntervalSet == null || propertyIntervalSet.isNil()) { return invalid(startOff, closeBraceOffset); } offset = closeBraceOffset + 1; diff --git a/tool/src/org/antlr/v4/parse/ANTLRLexer.g b/tool/src/org/antlr/v4/parse/ANTLRLexer.g index ff127d5fbe..c84a25f706 100644 --- a/tool/src/org/antlr/v4/parse/ANTLRLexer.g +++ b/tool/src/org/antlr/v4/parse/ANTLRLexer.g @@ -446,7 +446,6 @@ FRAGMENT : 'fragment' ; LEXER : 'lexer' ; PARSER : 'parser' ; GRAMMAR : 'grammar' ; -TREE_GRAMMAR : 'tree' WSNLCHARS* 'grammar' ; PROTECTED : 'protected' ; PUBLIC : 'public' ; PRIVATE : 'private' ; diff --git a/tool/src/org/antlr/v4/parse/ANTLRParser.g b/tool/src/org/antlr/v4/parse/ANTLRParser.g index 9b4866521d..267f05c665 100644 --- a/tool/src/org/antlr/v4/parse/ANTLRParser.g +++ b/tool/src/org/antlr/v4/parse/ANTLRParser.g @@ -197,7 +197,6 @@ if ( options!=null ) { grammarType @after { - if ( $tg!=null ) throw new v3TreeGrammarException(tg); if ( $t!=null ) ((GrammarRootAST)$tree).grammarType = $t.type; else ((GrammarRootAST)$tree).grammarType=COMBINED; } @@ -207,8 +206,6 @@ grammarType // A combined lexer and parser specification | g=GRAMMAR -> GRAMMAR[$g, "COMBINED_GRAMMAR", getTokenStream()] - | tg=TREE_GRAMMAR - ) ; diff --git a/tool/src/org/antlr/v4/parse/v3TreeGrammarException.java b/tool/src/org/antlr/v4/parse/v3TreeGrammarException.java deleted file mode 100644 index dad0d6ebf5..0000000000 --- a/tool/src/org/antlr/v4/parse/v3TreeGrammarException.java +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -package org.antlr.v4.parse; - -import org.antlr.runtime.Token; -import org.antlr.v4.runtime.misc.ParseCancellationException; - -public class v3TreeGrammarException extends ParseCancellationException { - public Token location; - - public v3TreeGrammarException(Token location) { - this.location = location; - } -} diff --git a/tool/src/org/antlr/v4/tool/BuildDependencyGenerator.java b/tool/src/org/antlr/v4/tool/BuildDependencyGenerator.java index c33aa4d6e4..5c8069d287 100644 --- a/tool/src/org/antlr/v4/tool/BuildDependencyGenerator.java +++ b/tool/src/org/antlr/v4/tool/BuildDependencyGenerator.java @@ -68,8 +68,7 @@ public class BuildDependencyGenerator { public BuildDependencyGenerator(Tool tool, Grammar g) { this.tool = tool; this.g = g; - String language = g.getOptionString("language"); - generator = new CodeGenerator(tool, g, language); + generator = CodeGenerator.create(g); } /** From T.g return a list of File objects that diff --git a/tool/src/org/antlr/v4/tool/ErrorType.java b/tool/src/org/antlr/v4/tool/ErrorType.java index a0f374b333..800af17ea6 100644 --- a/tool/src/org/antlr/v4/tool/ErrorType.java +++ b/tool/src/org/antlr/v4/tool/ErrorType.java @@ -824,7 +824,7 @@ public enum ErrorType { * * @since 4.2.1 */ - INVALID_ESCAPE_SEQUENCE(156, "invalid escape sequence ", ErrorSeverity.WARNING), + INVALID_ESCAPE_SEQUENCE(156, "invalid escape sequence ", ErrorSeverity.ERROR), /** * Compiler Warning 157. * @@ -1108,6 +1108,28 @@ public enum ErrorType { ErrorSeverity.WARNING ), + /** + *

+ * rule rule contains a closure with at least one alternative + * that can match EOF

+ * + *

A rule contains a closure ({@code (...)*}) or positive closure + * ({@code (...)+}) around EOF.

+ * + *

The following rule produces this error.

+ * + *
+	 * x : EOF*;         // error
+	 * y : EOF+;         // error
+	 * z : EOF;         // ok
+	 * 
+ */ + EOF_CLOSURE( + 186, + "rule contains a closure with at least one alternative that can match EOF", + ErrorSeverity.ERROR + ), + /* * Backward incompatibility errors */ @@ -1123,6 +1145,7 @@ public enum ErrorType { * instead offers automatically generated parse tree listeners and visitors * as a more maintainable alternative.

*/ + @Deprecated V3_TREE_GRAMMAR(200, "tree grammars are not supported in ANTLR 4", ErrorSeverity.ERROR), /** * Compiler Warning 201. diff --git a/tool/src/org/antlr/v4/tool/Grammar.java b/tool/src/org/antlr/v4/tool/Grammar.java index cbd4af76ad..7f63f74a17 100644 --- a/tool/src/org/antlr/v4/tool/Grammar.java +++ b/tool/src/org/antlr/v4/tool/Grammar.java @@ -793,8 +793,9 @@ public String[] getTokenLiteralNames() { } for (Map.Entry entry : stringLiteralToTypeMap.entrySet()) { - if (entry.getValue() >= 0 && entry.getValue() < literalNames.length && literalNames[entry.getValue()] == null) { - literalNames[entry.getValue()] = entry.getKey(); + int value = entry.getValue(); + if (value >= 0 && value < literalNames.length && literalNames[value] == null) { + literalNames[value] = entry.getKey(); } } @@ -886,7 +887,7 @@ public String getPredicateDisplayString(SemanticContext.Predicate pred) { public int getMaxCharValue() { return org.antlr.v4.runtime.Lexer.MAX_CHAR_VALUE; // if ( generator!=null ) { -// return generator.target.getMaxCharValue(generator); +// return generator.getTarget().getMaxCharValue(generator); // } // else { // return Label.MAX_CHAR_VALUE; @@ -1165,6 +1166,10 @@ public static String getGrammarTypeToFileNameSuffix(int type) { } } + public String getLanguage() { + return getOptionString("language"); + } + public String getOptionString(String key) { return ast.getOptionString(key); } /** Given ^(TOKEN_REF ^(OPTIONS ^(ELEMENT_OPTIONS (= assoc right))))