antlr · parrt · Dec 28, 2021 · Nov 6, 2021 · Nov 6, 2021 · Nov 12, 2021
diff --git a/...testsuite/resources/org/antlr/v4/test/runtime/descriptors/LexerExec/EscapedCharacters.txt b/...testsuite/resources/org/antlr/v4/test/runtime/descriptors/LexerExec/EscapedCharacters.txt
@@ -0,0 +1,16 @@
+[type]
+Lexer
+
+[grammar]
+lexer grammar L;
+LF : '\\u000A';
+X  : 'x';
+
+[input]
+"""x
+"""
+
+[output]
+[@0,0:0='x',<2>,1:0]
+[@1,1:1='\n',<1>,1:1]
+[@2,2:1='<EOF>',<-1>,2:0]
diff --git a/...ime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/ParserExec/EOFInClosure.txt b/...ime-testsuite/resources/org/antlr/v4/test/runtime/descriptors/ParserExec/EOFInClosure.txt
diff --git a/...org/antlr/v4/test/runtime/descriptors/SemPredEvalParser/PredFromAltTestedInLoopBack_1.txt b/...org/antlr/v4/test/runtime/descriptors/SemPredEvalParser/PredFromAltTestedInLoopBack_1.txt
@@ -0,0 +1,44 @@
+[notes]
+Loopback doesn't eval predicate at start of alt
+
+[type]
+Parser
+
+[grammar]
+grammar T;
+file_
+@after {<ToStringTree("$ctx"):writeln()>}
+  : para para EOF ;
+para: paraContent NL NL ;
+paraContent : ('s'|'x'|{<LANotEquals("2",{T<ParserToken("Parser", "NL")>})>}? NL)+ ;
+NL : '\n' ;
+s : 's' ;
+X : 'x' ;
+
+[start]
+file_
+
+[input]
+"""s
+
+
+x
+"""
+
+[output]
+"""(file_ (para (paraContent s) \n \n) (para (paraContent \n x \n)) <EOF>)
+"""
+
+[errors]
+"""line 5:0 mismatched input '<EOF>' expecting {'s', '\n', 'x'}
+"""
+
+[skip]
+Cpp
+CSharp
+Dart
+Go
+Node
+PHP
+Python2
+Python3
diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/java/BaseJavaTest.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/java/BaseJavaTest.java
@@ -461,7 +461,7 @@ public void testActions(String templates, String actionName, String action, Stri
             AnalysisPipeline anal = new AnalysisPipeline(g);
             anal.process();
 
-			CodeGenerator gen = new CodeGenerator(g);
+			CodeGenerator gen = CodeGenerator.create(g);
 			ST outputFileST = gen.generateParser(false);
 			String output = outputFileST.render();
 			//System.out.println(output);

diff --git a/tool-testsuite/test/org/antlr/v4/misc/UtilsTest.java b/tool-testsuite/test/org/antlr/v4/misc/UtilsTest.java
@@ -1,6 +1,7 @@
 package org.antlr.v4.misc;
 
 import org.antlr.runtime.Token;
+import org.antlr.v4.codegen.CodeGenerator;
 import org.antlr.v4.tool.ast.GrammarAST;
 import org.junit.Assert;
 import org.junit.Test;

diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestCodeGeneration.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestCodeGeneration.java
@@ -130,7 +130,7 @@ public List<String> getEvalInfoForString(String grammarString, String pattern) t
 			if (g.isLexer()) factory = new LexerATNFactory((LexerGrammar) g);
 			g.atn = factory.createATN();
 
-			CodeGenerator gen = new CodeGenerator(g);
+			CodeGenerator gen = CodeGenerator.create(g);
 			ST outputFileST = gen.generateParser();
 
 //			STViz viz = outputFileST.inspect();

diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java
@@ -345,6 +345,42 @@ public void AllErrorCodesDistinct() {
 		super.testErrors(pair, true);
 	}
 
+	// Test for https://github.com/antlr/antlr4/issues/2860, https://github.com/antlr/antlr4/issues/1105
+	@Test public void testEpsilonClosureInLexer() {
+		String grammar =
+				"lexer grammar T;\n" +
+				"TOKEN: '\\'' FRAGMENT '\\'';\n" +
+				"fragment FRAGMENT: ('x'|)+;";
+
+		String expected =
+			"error(" + ErrorType.EPSILON_CLOSURE.code + "): T.g4:3:9: rule FRAGMENT contains a closure with at least one alternative that can match an empty string\n";
+
+		String[] pair = new String[] {
+				grammar,
+				expected
+		};
+
+		super.testErrors(pair, true);
+	}
+
+	// Test for https://github.com/antlr/antlr4/issues/3359
+	@Test public void testEofClosure() {
+		String grammar =
+				"lexer grammar EofClosure;\n" +
+				"EofClosure: 'x' EOF*;\n" +
+				"EofInAlternative: 'y' ('z' | EOF);";
+
+		String expected =
+			"error(" + ErrorType.EOF_CLOSURE.code + "): EofClosure.g4:2:0: rule EofClosure contains a closure with at least one alternative that can match EOF\n";
+
+		String[] pair = new String[] {
+				grammar,
+				expected
+		};
+
+		super.testErrors(pair, true);
+	}
+
 	// Test for https://github.com/antlr/antlr4/issues/1203
 	@Test public void testEpsilonOptionalAndClosureAnalysis() {
 		String grammar =
@@ -452,8 +488,8 @@ public void AllErrorCodesDistinct() {
 			"lexer grammar A;\n" +
 			"STRING : '\\\"' '\\\"' 'x' ;";
 		String expected =
-			"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:10: invalid escape sequence \\\"\n"+
-			"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:15: invalid escape sequence \\\"\n";
+			"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:10: invalid escape sequence \\\"\n"+
+			"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:15: invalid escape sequence \\\"\n";
 
 		String[] pair = new String[] {
 			grammar,
@@ -493,10 +529,9 @@ public void AllErrorCodesDistinct() {
 			"lexer grammar A;\n" +
 			"RULE : 'Foo \\uAABG \\x \\u';\n";
 		String expected =
-			"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence \\uAABG\n" +
-			"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence \\x\n" +
-			"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence \\u\n" +
-			"warning("+ErrorType.EPSILON_TOKEN.code+"): A.g4:2:0: non-fragment lexer rule RULE can match the empty string\n";
+			"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence \\uAABG\n" +
+			"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence \\x\n" +
+			"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence \\u\n";
 
 		String[] pair = new String[] {
 			grammar,
@@ -536,7 +571,6 @@ public void AllErrorCodesDistinct() {
 	@Test public void testInvalidCharSetsAndStringLiterals() {
 		String grammar =
 				"lexer grammar Test;\n" +
-				"INVALID_STRING_LITERAL:       '\\\"' | '\\]' | '\\u24';\n" +
 				"INVALID_STRING_LITERAL_RANGE: 'GH'..'LM';\n" +
 				"INVALID_CHAR_SET:             [\\u24\\uA2][\\{];\n" +  //https://github.com/antlr/antlr4/issues/1077
 				"EMPTY_STRING_LITERAL_RANGE:   'F'..'A' | 'Z';\n" +
@@ -549,21 +583,14 @@ public void AllErrorCodesDistinct() {
 				"EMPTY_CHAR_SET_WITH_INVALID_ESCAPE_SEQUENCE: [\\'];";  // https://github.com/antlr/antlr4/issues/1556
 
 		String expected =
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:31: invalid escape sequence \\\"\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:38: invalid escape sequence \\]\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:45: invalid escape sequence \\u24\n" +
-				"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:30: multi-character literals are not allowed in lexer sets: 'GH'\n" +
-				"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:36: multi-character literals are not allowed in lexer sets: 'LM'\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence \\u24\\u\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:4:30: string literals and sets cannot be empty: []\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence \\{\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:4:40: string literals and sets cannot be empty: []\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:11:84: invalid escape sequence \\'\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:11:84: string literals and sets cannot be empty: []\n" +
-				"warning("+ ErrorType.EPSILON_TOKEN.code + "): Test.g4:2:0: non-fragment lexer rule INVALID_STRING_LITERAL can match the empty string\n";
+				"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:2:30: multi-character literals are not allowed in lexer sets: 'GH'\n" +
+				"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:2:36: multi-character literals are not allowed in lexer sets: 'LM'\n" +
+				"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:30: invalid escape sequence \\u24\\u\n" +
+				"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:40: invalid escape sequence \\{\n" +
+				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:4:33: string literals and sets cannot be empty: 'F'..'A'\n" +
+				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:30: string literals and sets cannot be empty: 'f'..'a'\n" +
+				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:36: string literals and sets cannot be empty: []\n" +
+				"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:10:84: invalid escape sequence \\'\n";
 
 		String[] pair = new String[] {
 				grammar,
@@ -587,33 +614,23 @@ public void AllErrorCodesDistinct() {
 				"UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}];\n" +
 				"UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE_2: [\\p{Letter}-Z];\n" +
 				"UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE_3: [A-\\p{Number}];\n" +
-				"INVERTED_UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\P{Uppercase_Letter}-\\P{Number}];\n";
+				"INVERTED_UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\P{Uppercase_Letter}-\\P{Number}];\n" +
+				"EMOJI_MODIFIER: [\\p{Grapheme_Cluster_Break=E_Base}];\n";
 
 		String expected =
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:32: invalid escape sequence \\u{}\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:2:32: string literals and sets cannot be empty: []\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:41: invalid escape sequence \\u{\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:3:41: string literals and sets cannot be empty: []\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:35: invalid escape sequence \\u{110\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:4:35: string literals and sets cannot be empty: []\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:5:32: invalid escape sequence \\p{}\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:32: string literals and sets cannot be empty: []\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:41: invalid escape sequence \\p{\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:41: string literals and sets cannot be empty: []\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:41: invalid escape sequence \\P{}\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:7:41: string literals and sets cannot be empty: []\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:8:34: invalid escape sequence \\p{NotAProperty}\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:8:34: string literals and sets cannot be empty: []\n" +
-				"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:9:43: invalid escape sequence \\P{NotAProperty}\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:9:43: string literals and sets cannot be empty: []\n" +
+				"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:32: invalid escape sequence \\u{}\n" +
+				"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:41: invalid escape sequence \\u{\n" +
+				"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:35: invalid escape sequence \\u{110\n" +
+				"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:5:32: invalid escape sequence \\p{}\n" +
+				"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:41: invalid escape sequence \\p{\n" +
+				"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:41: invalid escape sequence \\P{}\n" +
+				"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:8:34: invalid escape sequence \\p{NotAProperty}\n" +
+				"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:9:43: invalid escape sequence \\P{NotAProperty}\n" +
 				"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:10:39: unicode property escapes not allowed in lexer charset range: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}]\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:10:39: string literals and sets cannot be empty: []\n" +
 				"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:11:41: unicode property escapes not allowed in lexer charset range: [\\p{Letter}-Z]\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:11:41: string literals and sets cannot be empty: []\n" +
 				"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:12:41: unicode property escapes not allowed in lexer charset range: [A-\\p{Number}]\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:12:41: string literals and sets cannot be empty: []\n" +
 				"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:13:48: unicode property escapes not allowed in lexer charset range: [\\P{Uppercase_Letter}-\\P{Number}]\n" +
-				"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:13:48: string literals and sets cannot be empty: []\n";
+				"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:14:16: invalid escape sequence \\p{Grapheme_Cluster_Break=E_Base}\n";
 
 		String[] pair = new String[] {
 				grammar,
@@ -839,4 +856,11 @@ public void AllErrorCodesDistinct() {
 
 		super.testErrors(pair, true);
 	}
+
+	@Test public void testRuleNamesAsTree() {
+		String grammar = "" +
+				"grammar T;\n" +
+				"tree : 'X';";
+		super.testErrors(new String[] { grammar, "" }, true);
+	}
 }
diff --git a/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java b/tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java
@@ -7,72 +7,60 @@
 package org.antlr.v4.test.tool;
 
 import org.antlr.v4.codegen.UnicodeEscapes;
-
 import org.junit.Test;
 
 import static org.junit.Assert.assertEquals;
 
 public class TestUnicodeEscapes {
 	@Test
 	public void latinJavaEscape() {
-		StringBuilder sb = new StringBuilder();
-		UnicodeEscapes.appendJavaStyleEscapedCodePoint(0x0061, sb);
-		assertEquals("\\u0061", sb.toString());
+		checkUnicodeEscape("\\u0061", 0x0061, "Java");
 	}
 
 	@Test
 	public void latinPythonEscape() {
-		StringBuilder sb = new StringBuilder();
-		UnicodeEscapes.appendPythonStyleEscapedCodePoint(0x0061, sb);
-		assertEquals("\\u0061", sb.toString());
+		checkUnicodeEscape("\\u0061", 0x0061, "Python2");
+		checkUnicodeEscape("\\u0061", 0x0061, "Python3");
 	}
 
 	@Test
 	public void latinSwiftEscape() {
-		StringBuilder sb = new StringBuilder();
-		UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0x0061, sb);
-		assertEquals("\\u{0061}", sb.toString());
+		checkUnicodeEscape("\\u{0061}", 0x0061, "Swift");
 	}
 
 	@Test
 	public void bmpJavaEscape() {
-		StringBuilder sb = new StringBuilder();
-		UnicodeEscapes.appendJavaStyleEscapedCodePoint(0xABCD, sb);
-		assertEquals("\\uABCD", sb.toString());
+		checkUnicodeEscape("\\uABCD", 0xABCD, "Java");
 	}
 
 	@Test
 	public void bmpPythonEscape() {
-		StringBuilder sb = new StringBuilder();
-		UnicodeEscapes.appendPythonStyleEscapedCodePoint(0xABCD, sb);
-		assertEquals("\\uABCD", sb.toString());
+		checkUnicodeEscape("\\uABCD", 0xABCD, "Python2");
+		checkUnicodeEscape("\\uABCD", 0xABCD, "Python3");
 	}
 
 	@Test
 	public void bmpSwiftEscape() {
-		StringBuilder sb = new StringBuilder();
-		UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0xABCD, sb);
-		assertEquals("\\u{ABCD}", sb.toString());
+		checkUnicodeEscape("\\u{ABCD}", 0xABCD, "Swift");
 	}
 
 	@Test
 	public void smpJavaEscape() {
-		StringBuilder sb = new StringBuilder();
-		UnicodeEscapes.appendJavaStyleEscapedCodePoint(0x1F4A9, sb);
-		assertEquals("\\uD83D\\uDCA9", sb.toString());
+		checkUnicodeEscape("\\uD83D\\uDCA9", 0x1F4A9, "Java");
 	}
 
 	@Test
 	public void smpPythonEscape() {
-		StringBuilder sb = new StringBuilder();
-		UnicodeEscapes.appendPythonStyleEscapedCodePoint(0x1F4A9, sb);
-		assertEquals("\\U0001F4A9", sb.toString());
+		checkUnicodeEscape("\\U0001F4A9", 0x1F4A9, "Python2");
+		checkUnicodeEscape("\\U0001F4A9", 0x1F4A9, "Python3");
 	}
 
 	@Test
 	public void smpSwiftEscape() {
-		StringBuilder sb = new StringBuilder();
-		UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0x1F4A9, sb);
-		assertEquals("\\u{1F4A9}", sb.toString());
+		checkUnicodeEscape("\\u{1F4A9}", 0x1F4A9, "Swift");
+	}
+
+	private void checkUnicodeEscape(String expected, int input, String language) {
+		assertEquals(expected, UnicodeEscapes.escapeCodePoint(input, language));
 	}
 }