diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/swift/BaseSwiftTest.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/swift/BaseSwiftTest.java index 55358b2d00..6751b33c16 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/swift/BaseSwiftTest.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/swift/BaseSwiftTest.java @@ -396,12 +396,10 @@ private void generateParser(String grammarFileName, List files = new ArrayList<>(); if (lexerName != null) { files.add(lexerName + ".swift"); - files.add(lexerName + "ATN.swift"); } if (parserName != null) { files.add(parserName + ".swift"); - files.add(parserName + "ATN.swift"); Set optionsSet = new HashSet<>(Arrays.asList(extraOptions)); String grammarName = grammarFileName.substring(0, grammarFileName.lastIndexOf('.')); if (!optionsSet.contains("-no-listener")) { diff --git a/runtime/Swift/Sources/Antlr4/Parser.swift b/runtime/Swift/Sources/Antlr4/Parser.swift index 13a41f14da..c3d913fdd1 100644 --- a/runtime/Swift/Sources/Antlr4/Parser.swift +++ b/runtime/Swift/Sources/Antlr4/Parser.swift @@ -423,7 +423,7 @@ open class Parser: Recognizer { var opts = ATNDeserializationOptions() opts.generateRuleBypassTransitions = true - let result = try! ATNDeserializer(opts).deserialize(Array(serializedAtn)) + let result = try! ATNDeserializer(opts).deserialize(serializedAtn) bypassAltsAtnCache[serializedAtn] = result return result } diff --git a/runtime/Swift/Sources/Antlr4/atn/ATNDeserializer.swift b/runtime/Swift/Sources/Antlr4/atn/ATNDeserializer.swift index 9c831bdd61..7c811d4159 100644 --- a/runtime/Swift/Sources/Antlr4/atn/ATNDeserializer.swift +++ b/runtime/Swift/Sources/Antlr4/atn/ATNDeserializer.swift @@ -22,10 +22,10 @@ public class ATNDeserializer { self.deserializationOptions = deserializationOptions ?? ATNDeserializationOptions() } - public func deserialize(_ inData: [Character]) throws -> ATN { - let data = inData.map { Int(integerLiteral: $0.unicodeValue) } - + public func deserialize(_ str: String) throws -> ATN { + let data = str.utf16.map { element in Int(element) } var p = 0 + let version = data[p] p += 1 if version != ATNDeserializer.SERIALIZED_VERSION { @@ -57,8 +57,7 @@ public class ATNDeserializer { var ruleIndex = data[p] p += 1 - if ruleIndex == Int.max { - // Character.MAX_VALUE + if ruleIndex == UInt16.max { ruleIndex = -1 } @@ -117,7 +116,7 @@ public class ATNDeserializer { if atn.grammarType == ATNType.lexer { var tokenType = data[p] p += 1 - if tokenType == 0xFFFF { + if tokenType == UInt16.max { tokenType = CommonToken.EOF } @@ -180,12 +179,14 @@ public class ATNDeserializer { // let ndecisions = data[p] p += 1 - for i in 1...ndecisions { - let s = data[p] - p += 1 - let decState = atn.states[s] as! DecisionState - atn.appendDecisionToState(decState) - decState.decision = i - 1 + if (ndecisions >= 1) { + for i in 1...ndecisions { + let s = data[p] + p += 1 + let decState = atn.states[s] as! DecisionState + atn.appendDecisionToState(decState) + decState.decision = i - 1 + } } // @@ -200,13 +201,13 @@ public class ATNDeserializer { p += 1 var data1 = data[p] p += 1 - if data1 == 0xFFFF { + if data1 == UInt16.max { data1 = -1 } var data2 = data[p] p += 1 - if data2 == 0xFFFF { + if data2 == UInt16.max { data2 = -1 } @@ -220,7 +221,6 @@ public class ATNDeserializer { return atn } - private func readUnicodeInt(_ data: [Int], _ p: inout Int) -> Int { let result = data[p] p += 1 @@ -254,201 +254,6 @@ public class ATNDeserializer { } } - public func deserializeFromJson(_ jsonStr: String) -> ATN { - guard !jsonStr.isEmpty else { - fatalError("ATN Serialization is empty,Please include *LexerATN.json and *ParserATN.json in TARGETS-Build Phases-Copy Bundle Resources") - } - if let JSONData = jsonStr.data(using: .utf8) { - do { - let JSON = try JSONSerialization.jsonObject(with: JSONData, options: JSONSerialization.ReadingOptions(rawValue: 0)) - guard let JSONDictionary = JSON as? [String: Any] else { - fatalError("deserializeFromJson Not a Dictionary") - } - - return try dictToJson(JSONDictionary) - - } catch let JSONError { - print("\(JSONError)") - } - } - - fatalError("Could not deserialize ATN ") - } - - public func dictToJson(_ dict: [String: Any]) throws -> ATN { - let version = dict["version"] as! Int - if version != ATNDeserializer.SERIALIZED_VERSION { - let reason = "Could not deserialize ATN with version \(version) (expected \(ATNDeserializer.SERIALIZED_VERSION))." - throw ANTLRError.unsupportedOperation(msg: reason) - } - - let grammarType = ATNType(rawValue: dict["grammarType"] as! Int)! - let maxTokenType = dict["maxTokenType"] as! Int - let atn = ATN(grammarType, maxTokenType) - - // - // STATES - // - var loopBackStateNumbers = [(LoopEndState, Int)]() - var endStateNumbers = [(BlockStartState, Int)]() - - let states = dict["states"] as! [[String: Any]] - - for state in states { - let ruleIndex = state["ruleIndex"] as! Int - let stype = state["stateType"] as! Int - let s = try stateFactory(stype, ruleIndex)! - if stype == ATNState.LOOP_END { - // special case - let loopBackStateNumber = state["detailStateNumber"] as! Int - loopBackStateNumbers.append((s as! LoopEndState, loopBackStateNumber)) - } - else if let bsState = s as? BlockStartState { - let endStateNumber = state["detailStateNumber"] as! Int - endStateNumbers.append((bsState, endStateNumber)) - } - atn.addState(s) - } - - // delay the assignment of loop back and end states until we know all the state instances have been initialized - for pair in loopBackStateNumbers { - pair.0.loopBackState = atn.states[pair.1] - } - - for pair in endStateNumbers { - pair.0.endState = atn.states[pair.1] as? BlockEndState - } - - let numNonGreedyStates = dict["nonGreedyStates"] as! [Int] - for numNonGreedyState in numNonGreedyStates { - (atn.states[numNonGreedyState] as! DecisionState).nonGreedy = true - } - - let numPrecedenceStates = dict["precedenceStates"] as! [Int] - for numPrecedenceState in numPrecedenceStates { - (atn.states[numPrecedenceState] as! RuleStartState).isPrecedenceRule = true - } - - // - // RULES - // - let ruleToStartState = dict["ruleToStartState"] as! [[String: Any]] - let nrules = ruleToStartState.count - var ruleToTokenType = [Int]() - var ruleToStartStateParsed = [RuleStartState]() - for i in 0.. : { SerializedATN(model) ::= << private static char[] _serializedATN = { - , + '}; separator=",", wrap> }; public static readonly ATN _ATN = diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg index 72f104c28c..5c558fa11f 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg @@ -383,13 +383,12 @@ static std::vector\ _serializedATN; // Constructs the serialized ATN and writes init code for static member vars. SerializedATN(model) ::= << -[] = { - }> -\};}; separator="\n"> +static const uint16_t serializedATNSegment[] = { + }; separator=",", wrap> +}; -, - serializedATNSegment + sizeof(serializedATNSegment) / sizeof(serializedATNSegment[0])); -}> +_serializedATN.insert(_serializedATN.end(), serializedATNSegment, + serializedATNSegment + sizeof(serializedATNSegment) / sizeof(serializedATNSegment[0])); atn::ATNDeserializer deserializer; _atn = deserializer.deserialize(_serializedATN); diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Dart/Dart.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Dart/Dart.stg index 27de1c7818..059b845f5f 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Dart/Dart.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Dart/Dart.stg @@ -853,17 +853,8 @@ class extends { >> SerializedATN(model) ::= << - - - = - '<\t>'}>';}; separator="\n"> -static final String _serializedATN = [ - }; separator=",\n"> - ].join(); - - static const String _serializedATN = '<\t>'}>'; - + static final ATN _ATN = ATNDeserializer().deserialize(_serializedATN.codeUnits); >> diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/PHP/PHP.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/PHP/PHP.stg index fc8959eb37..ce1adca97e 100644 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/PHP/PHP.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/PHP/PHP.stg @@ -1209,19 +1209,8 @@ namespace { >> SerializedATN(model) ::= << - -/** - * @var string - */ -private const SERIALIZED_ATN = - "}>"}; separator=" .\n">; - -/** - * @var string - */ private const SERIALIZED_ATN = " "}>"; - >> /** diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Swift/Swift.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Swift/Swift.stg index 396946b98a..40235c0f00 100755 --- a/tool/resources/org/antlr/v4/tool/templates/codegen/Swift/Swift.stg +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Swift/Swift.stg @@ -300,10 +300,7 @@ case : - static let _serializedATN = ATN().jsonString - - - static let _ATN = ATNDeserializer().deserializeFromJson(_serializedATN) + static let _ATN = try! ATNDeserializer().deserialize(_serializedATN) } >> @@ -1047,19 +1044,14 @@ Lexer(lexer, atn, actionFuncs, sempredFuncs, superClass) ::= << - static let _serializedATN: String = ATN().jsonString - - - static let _ATN: ATN = ATNDeserializer().deserializeFromJson(_serializedATN) + static let _ATN: ATN = try! ATNDeserializer().deserialize(_serializedATN) } >> -/** Don't need to define anything. The tool generates a XParserATN.swift file (and same for lexer) - * which is referenced from static field _serializedATN. This json string is passed to - * deserializeFromJson(). Note this is not the "serialization as array of ints" that other targets - * do. It is more or less the output of ATNPrinter which gets read back in. - */ SerializedATN(model) ::= << +static let _serializedATN: String = """ + }> + """ >> /** Using a type to init value map, try to init a type; if not in table diff --git a/tool/src/org/antlr/v4/codegen/Target.java b/tool/src/org/antlr/v4/codegen/Target.java index 343ebb8720..e06cf288f5 100644 --- a/tool/src/org/antlr/v4/codegen/Target.java +++ b/tool/src/org/antlr/v4/codegen/Target.java @@ -26,36 +26,52 @@ import org.stringtemplate.v4.StringRenderer; import org.stringtemplate.v4.misc.STMessage; +import java.util.HashMap; +import java.util.Map; import java.util.Set; /** */ public abstract class Target { - /** For pure strings of Java 16-bit Unicode char, how can we display - * it in the target language as a literal. Useful for dumping + protected final CodeGenerator gen; + private STGroup templates; + private boolean isPreviousOctal = false; + + protected static final Map defaultCharValueEscape; + static { + // https://docs.oracle.com/javase/tutorial/java/data/characters.html + HashMap map = new HashMap<>(); + addEscapedChar(map, '\t', 't'); + addEscapedChar(map, '\b', 'b'); + addEscapedChar(map, '\n', 'n'); + addEscapedChar(map, '\r', 'r'); + addEscapedChar(map, '\f', 'f'); + addEscapedChar(map, '\''); + addEscapedChar(map, '\"'); + addEscapedChar(map, '\\'); + defaultCharValueEscape = map; + } + + protected Target(CodeGenerator gen) { + this.gen = gen; + } + + /** For pure strings of Unicode char, how can we display + * it in the target language as a literal. Useful for dumping * predicates and such that may refer to chars that need to be escaped * when represented as strings. Also, templates need to be escaped so * that the target language can hold them as a string. - *

- * I have defined (via the constructor) the set of typical escapes, - * but your {@link Target} subclass is free to alter the translated chars - * or add more definitions. This is non-static so each target can have - * a different set in memory at same time. + * Each target can have a different set in memory at same time. */ - protected String[] targetCharValueEscape = new String[255]; + public Map getTargetCharValueEscape() { + return defaultCharValueEscape; + } - protected final CodeGenerator gen; - private STGroup templates; + protected static void addEscapedChar(HashMap map, char key) { + addEscapedChar(map, key, key); + } - protected Target(CodeGenerator gen) { - targetCharValueEscape['\n'] = "\\n"; - targetCharValueEscape['\r'] = "\\r"; - targetCharValueEscape['\t'] = "\\t"; - targetCharValueEscape['\b'] = "\\b"; - targetCharValueEscape['\f'] = "\\f"; - targetCharValueEscape['\\'] = "\\\\"; - targetCharValueEscape['\''] = "\\'"; - targetCharValueEscape['"'] = "\\\""; - this.gen = gen; + protected static void addEscapedChar(HashMap map, char key, char representation) { + map.put(key, "\\" + representation); } public String getLanguage() { return gen.language; } @@ -157,13 +173,11 @@ public String getTargetStringLiteralFromString(String s, boolean quoted) { if ( quoted ) { buf.append('"'); } - for (int i=0; i= 0x20 && v < 127 && (!Character.isDigit(v) || v == '8' || v == '9')) { - return String.valueOf((char)v); + switch (Character.getType(c)) + { + case Character.CONTROL: + case Character.LINE_SEPARATOR: + case Character.PARAGRAPH_SEPARATOR: + return escapeChar(v); + default: + if (v == 0xfffe) { + return escapeChar(v); + } + + if (isPreviousOctal) { + char upperBound = language.equals("PHP") ? '9' : '7'; + if (c >= '0' && c <= upperBound) { + return escapeChar(v); + } + } + + isPreviousOctal = false; + return String.valueOf(c); } + } - if ( v<=127 ) { - String oct = Integer.toOctalString(v); - return "\\"+ oct; + private String escapeChar(int v) { + String language = getLanguage(); + + boolean isPhp = language.equals("PHP"); + boolean supportsOctalEncoding = language.equals("Java") + || language.equals("Python2") + || language.equals("Python3") + || isPhp; + if (supportsOctalEncoding && v <= (isPhp ? 127 : 255)) { + isPreviousOctal = true; + return String.format("\\%o", v); + } else { + isPreviousOctal = false; } - String hex = Integer.toHexString(v|0x10000).substring(1,5); - return "\\u"+hex; + switch (language) { + default: + case "Java": + case "JavaScript": + case "Python2": + case "Python3": + return String.format("\\u%04x", v); + case "CSharp": + return String.format("\\x%X", v); + case "Dart": + case "PHP": + case "Swift": + return String.format("\\u{%X}", v); + } } public String getLoopLabel(GrammarAST ast) { @@ -627,5 +690,5 @@ public boolean supportsOverloadedMethods() { } /** @since 4.6 */ - public boolean needsHeader() { return false; }; // Override in targets that need header files. + public boolean needsHeader() { return false; } // Override in targets that need header files. } diff --git a/tool/src/org/antlr/v4/codegen/model/SerializedATN.java b/tool/src/org/antlr/v4/codegen/model/SerializedATN.java index ece510b79c..366499bb84 100644 --- a/tool/src/org/antlr/v4/codegen/model/SerializedATN.java +++ b/tool/src/org/antlr/v4/codegen/model/SerializedATN.java @@ -12,32 +12,32 @@ import org.antlr.v4.runtime.atn.ATNSerializer; import org.antlr.v4.runtime.misc.IntegerList; -import java.util.ArrayList; -import java.util.List; - public class SerializedATN extends OutputModelObject { - // TODO: make this into a kind of decl or multiple? - public List serialized; + public final String[] serialized; + public final String[][] segments; + public SerializedATN(OutputModelFactory factory, ATN atn) { super(factory); Target target = factory.getGenerator().getTarget(); IntegerList data = ATNSerializer.getSerialized(atn, target.getLanguage()); - serialized = new ArrayList(data.size()); - for (int c : data.toArray()) { - String encoded = target.encodeIntAsCharEscape(c == -1 ? Character.MAX_VALUE : c); - serialized.add(encoded); + int size = data.size(); + int segmentLimit = target.getSerializedATNSegmentLimit(); + segments = new String[(int)(((long)size + segmentLimit - 1) / segmentLimit)][]; + int segmentIndex = 0; + + for (int i = 0; i < size; i += segmentLimit) { + int segmentSize = Math.min(i + segmentLimit, size) - i; + String[] segment = new String[segmentSize]; + segments[segmentIndex++] = segment; + for (int j = 0; j < segmentSize; j++) { + segment[j] = target.encodeIntAsCharEscape(data.get(i + j)); + } } -// System.out.println(ATNSerializer.getDecoded(factory.getGrammar(), atn)); + + serialized = segments[0]; } public String[][] getSegments() { - List segments = new ArrayList(); - int segmentLimit = factory.getGenerator().getTarget().getSerializedATNSegmentLimit(); - for (int i = 0; i < serialized.size(); i += segmentLimit) { - List currentSegment = serialized.subList(i, Math.min(i + segmentLimit, serialized.size())); - segments.add(currentSegment.toArray(new String[0])); - } - - return segments.toArray(new String[0][]); + return segments; } } diff --git a/tool/src/org/antlr/v4/codegen/target/CSharpTarget.java b/tool/src/org/antlr/v4/codegen/target/CSharpTarget.java index 0a30fdc0d5..8e0523f7fd 100644 --- a/tool/src/org/antlr/v4/codegen/target/CSharpTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/CSharpTarget.java @@ -15,9 +15,7 @@ import org.stringtemplate.v4.StringRenderer; import org.stringtemplate.v4.misc.STMessage; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; +import java.util.*; public class CSharpTarget extends Target { protected static final HashSet reservedWords = new HashSet<>(Arrays.asList( @@ -101,11 +99,31 @@ public class CSharpTarget extends Target { "while" )); + protected static final Map targetCharValueEscape; + static { + // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/strings/#string-escape-sequences + HashMap map = new HashMap<>(); + addEscapedChar(map, '\''); + addEscapedChar(map, '\"'); + addEscapedChar(map, '\\'); + addEscapedChar(map, '\0', '0'); + addEscapedChar(map, (char)0x0007, 'a'); + addEscapedChar(map, (char)0x0008, 'b'); + addEscapedChar(map, '\f', 'f'); + addEscapedChar(map, '\n', 'n'); + addEscapedChar(map, '\r', 'r'); + addEscapedChar(map, '\t', 't'); + addEscapedChar(map, (char)0x000B, 'v'); + targetCharValueEscape = map; + } + public CSharpTarget(CodeGenerator gen) { super(gen); - targetCharValueEscape[0] = "\\0"; - targetCharValueEscape[0x0007] = "\\a"; - targetCharValueEscape[0x000B] = "\\v"; + } + + @Override + public Map getTargetCharValueEscape() { + return targetCharValueEscape; } @Override @@ -118,26 +136,6 @@ protected String escapeWord(String word) { return "@" + word; } - @Override - public String encodeIntAsCharEscape(int v) { - if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) { - throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v)); - } - - String formatted; - if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) { - formatted = targetCharValueEscape[v]; - } - else if (v >= 0x20 && v < 127 && (v < '0' || v > '9') && (v < 'a' || v > 'f') && (v < 'A' || v > 'F')) { - formatted = Character.toString((char)v); - } - else { - formatted = String.format("\\x%X", v & 0xFFFF); - } - - return "'" + formatted + "'"; - } - @Override protected STGroup loadTemplates() { // override the superclass behavior to put all C# templates in the same folder diff --git a/tool/src/org/antlr/v4/codegen/target/CppTarget.java b/tool/src/org/antlr/v4/codegen/target/CppTarget.java index 58cbc4e29c..d5cc5fff57 100644 --- a/tool/src/org/antlr/v4/codegen/target/CppTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/CppTarget.java @@ -16,11 +16,28 @@ import org.stringtemplate.v4.StringRenderer; import org.stringtemplate.v4.misc.STMessage; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; +import java.util.*; public class CppTarget extends Target { + protected static final Map targetCharValueEscape; + static { + // https://stackoverflow.com/a/10220539/1046374 + HashMap map = new HashMap<>(); + addEscapedChar(map, (char)0x0007, 'a'); + addEscapedChar(map, (char)0x0008, 'b'); + addEscapedChar(map, '\t', 't'); + addEscapedChar(map, '\n', 'n'); + addEscapedChar(map, (char)0x000B, 'v'); + addEscapedChar(map, '\f', 'f'); + addEscapedChar(map, '\r', 'r'); + addEscapedChar(map, (char)0x001B, 'e'); + addEscapedChar(map, '\"'); + addEscapedChar(map, '\''); + addEscapedChar(map, '?'); + addEscapedChar(map, '\\'); + targetCharValueEscape = map; + } + protected static final HashSet reservedWords = new HashSet<>(Arrays.asList( "alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand", "bitor", "bool", "break", "case", "catch", "char", "char16_t", @@ -43,7 +60,11 @@ public class CppTarget extends Target { public CppTarget(CodeGenerator gen) { super(gen); - targetCharValueEscape['?'] = "\\?"; + } + + @Override + public Map getTargetCharValueEscape() { + return targetCharValueEscape; } @Override @@ -65,11 +86,6 @@ protected boolean shouldUseUnicodeEscapeForCodePointInDoubleQuotedString(int cod } } - @Override - public String encodeIntAsCharEscape(int v) { - return "0x" + Integer.toHexString(v) + ", "; - } - @Override public String getRecognizerFileName(boolean header) { ST extST = getTemplates().getInstanceOf(header ? "headerFileExtension" : "codeFileExtension"); diff --git a/tool/src/org/antlr/v4/codegen/target/DartTarget.java b/tool/src/org/antlr/v4/codegen/target/DartTarget.java index 3b43c36636..3b3c6e47fe 100644 --- a/tool/src/org/antlr/v4/codegen/target/DartTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/DartTarget.java @@ -8,15 +8,22 @@ import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.codegen.Target; -import org.antlr.v4.tool.ast.GrammarAST; import org.stringtemplate.v4.STGroup; import org.stringtemplate.v4.StringRenderer; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; +import java.util.*; public class DartTarget extends Target { + protected static final Map targetCharValueEscape; + static { + HashMap map = new HashMap<>(); + for (Map.Entry entry : defaultCharValueEscape.entrySet()) { + map.put(entry.getKey(), entry.getValue()); + } + addEscapedChar(map, '$'); + targetCharValueEscape = map; + } + protected static final HashSet reservedWords = new HashSet<>(Arrays.asList( "abstract", "dynamic", "implements", "show", "as", "else", "import", "static", @@ -39,8 +46,11 @@ public class DartTarget extends Target { public DartTarget(CodeGenerator gen) { super(gen); + } - targetCharValueEscape['$'] = "\\$"; + @Override + public Map getTargetCharValueEscape() { + return targetCharValueEscape; } @Override @@ -60,13 +70,4 @@ protected STGroup loadTemplates() { return result; } - - @Override - public String encodeIntAsCharEscape(int v) { - if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) { - throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v)); - } - - return String.format("\\u{%X}", v & 0xFFFF); - } } diff --git a/tool/src/org/antlr/v4/codegen/target/GoTarget.java b/tool/src/org/antlr/v4/codegen/target/GoTarget.java index 279ff44ae5..900af8dc6c 100644 --- a/tool/src/org/antlr/v4/codegen/target/GoTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/GoTarget.java @@ -10,7 +10,6 @@ import org.antlr.v4.codegen.Target; import org.antlr.v4.parse.ANTLRParser; import org.antlr.v4.tool.Grammar; -import org.antlr.v4.tool.ast.GrammarAST; import org.stringtemplate.v4.ST; import org.stringtemplate.v4.STGroup; import org.stringtemplate.v4.StringRenderer; @@ -18,10 +17,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Locale; -import java.util.Set; +import java.util.*; public class GoTarget extends Target { protected static final HashSet reservedWords = new HashSet<>(Arrays.asList( @@ -98,11 +94,6 @@ private void gofmt(File fileName) { } } - @Override - public String encodeIntAsCharEscape(int v) { - return Integer.toString(v); - } - @Override public int getInlineTestSetWordSize() { return 32; diff --git a/tool/src/org/antlr/v4/codegen/target/JavaScriptTarget.java b/tool/src/org/antlr/v4/codegen/target/JavaScriptTarget.java index 78a60b1ec6..178f64567d 100644 --- a/tool/src/org/antlr/v4/codegen/target/JavaScriptTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/JavaScriptTarget.java @@ -51,24 +51,6 @@ protected Set getReservedWords() { return reservedWords; } - @Override - public String encodeIntAsCharEscape(int v) { - if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) { - throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v)); - } - - if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) { - return targetCharValueEscape[v]; - } - - if (v >= 0x20 && v < 127) { - return String.valueOf((char)v); - } - - String hex = Integer.toHexString(v|0x10000).substring(1,5); - return "\\u"+hex; - } - @Override public int getInlineTestSetWordSize() { return 32; diff --git a/tool/src/org/antlr/v4/codegen/target/JavaTarget.java b/tool/src/org/antlr/v4/codegen/target/JavaTarget.java index 305840e89c..b676ced52d 100644 --- a/tool/src/org/antlr/v4/codegen/target/JavaTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/JavaTarget.java @@ -11,10 +11,7 @@ import org.stringtemplate.v4.STGroup; import org.stringtemplate.v4.StringRenderer; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Locale; -import java.util.Set; +import java.util.*; public class JavaTarget extends Target { /** @@ -40,7 +37,7 @@ public JavaTarget(CodeGenerator gen) { super(gen); } - @Override + @Override public Set getReservedWords() { return reservedWords; } diff --git a/tool/src/org/antlr/v4/codegen/target/PHPTarget.java b/tool/src/org/antlr/v4/codegen/target/PHPTarget.java index 08558eb190..1b455c3dc1 100644 --- a/tool/src/org/antlr/v4/codegen/target/PHPTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/PHPTarget.java @@ -11,9 +11,7 @@ import org.stringtemplate.v4.STGroup; import org.stringtemplate.v4.StringRenderer; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; +import java.util.*; public class PHPTarget extends Target { protected static final HashSet reservedWords = new HashSet<>(Arrays.asList( @@ -45,23 +43,34 @@ public class PHPTarget extends Target { "rule", "parserRule" )); + protected static final Map targetCharValueEscape; + static { + // https://www.php.net/manual/en/language.types.string.php + HashMap map = new HashMap<>(); + addEscapedChar(map, '\n', 'n'); + addEscapedChar(map, '\r', 'r'); + addEscapedChar(map, '\t', 't'); + addEscapedChar(map, (char)0x000B, 'v'); + addEscapedChar(map, (char)0x001B, 'e'); + addEscapedChar(map, '\f', 'f'); + addEscapedChar(map, '\\'); + addEscapedChar(map, '$'); + addEscapedChar(map, '\"'); + targetCharValueEscape = map; + } + public PHPTarget(CodeGenerator gen) { super(gen); - targetCharValueEscape['$'] = "\\$"; } @Override - protected Set getReservedWords() { - return reservedWords; + public Map getTargetCharValueEscape() { + return targetCharValueEscape; } @Override - public String encodeIntAsCharEscape(int v) { - if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) { - throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v)); - } - - return String.format("\\u{%X}", v & 0xFFFF); + protected Set getReservedWords() { + return reservedWords; } @Override diff --git a/tool/src/org/antlr/v4/codegen/target/Python2Target.java b/tool/src/org/antlr/v4/codegen/target/Python2Target.java index 9061fb1405..8db5d017ef 100644 --- a/tool/src/org/antlr/v4/codegen/target/Python2Target.java +++ b/tool/src/org/antlr/v4/codegen/target/Python2Target.java @@ -8,14 +8,10 @@ import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.codegen.Target; -import org.antlr.v4.tool.ast.GrammarAST; import org.stringtemplate.v4.STGroup; import org.stringtemplate.v4.StringRenderer; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Locale; -import java.util.Set; +import java.util.*; public class Python2Target extends Target { protected static final HashSet reservedWords = new HashSet<>(Arrays.asList( @@ -49,10 +45,32 @@ public class Python2Target extends Target { "rule", "parserRule" )); + protected static final Map targetCharValueEscape; + static { + // https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals + HashMap map = new HashMap<>(); + addEscapedChar(map, '\\'); + addEscapedChar(map, '\''); + addEscapedChar(map, '\"'); + addEscapedChar(map, (char)0x0007, 'a'); + addEscapedChar(map, (char)0x0008, 'b'); + addEscapedChar(map, '\f', 'f'); + addEscapedChar(map, '\n', 'n'); + addEscapedChar(map, '\r', 'r'); + addEscapedChar(map, '\t', 't'); + addEscapedChar(map, (char)0x000B, 'v'); + targetCharValueEscape = map; + } + public Python2Target(CodeGenerator gen) { super(gen); } + @Override + public Map getTargetCharValueEscape() { + return targetCharValueEscape; + } + @Override protected Set getReservedWords() { return reservedWords; diff --git a/tool/src/org/antlr/v4/codegen/target/Python3Target.java b/tool/src/org/antlr/v4/codegen/target/Python3Target.java index acd0c395f2..c4684cc68b 100644 --- a/tool/src/org/antlr/v4/codegen/target/Python3Target.java +++ b/tool/src/org/antlr/v4/codegen/target/Python3Target.java @@ -11,10 +11,7 @@ import org.stringtemplate.v4.STGroup; import org.stringtemplate.v4.StringRenderer; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Locale; -import java.util.Set; +import java.util.*; public class Python3Target extends Target { protected static final HashSet reservedWords = new HashSet<>(Arrays.asList( @@ -47,10 +44,32 @@ public class Python3Target extends Target { "rule", "parserRule" )); + protected static final Map targetCharValueEscape; + static { + // https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals + HashMap map = new HashMap<>(); + addEscapedChar(map, '\\'); + addEscapedChar(map, '\''); + addEscapedChar(map, '\"'); + addEscapedChar(map, (char)0x0007, 'a'); + addEscapedChar(map, (char)0x0008, 'b'); + addEscapedChar(map, '\f', 'f'); + addEscapedChar(map, '\n', 'n'); + addEscapedChar(map, '\r', 'r'); + addEscapedChar(map, '\t', 't'); + addEscapedChar(map, (char)0x000B, 'v'); + targetCharValueEscape = map; + } + public Python3Target(CodeGenerator gen) { super(gen); } + @Override + public Map getTargetCharValueEscape() { + return targetCharValueEscape; + } + @Override protected Set getReservedWords() { return reservedWords; diff --git a/tool/src/org/antlr/v4/codegen/target/SwiftTarget.java b/tool/src/org/antlr/v4/codegen/target/SwiftTarget.java index ff689c894a..8722386cd7 100644 --- a/tool/src/org/antlr/v4/codegen/target/SwiftTarget.java +++ b/tool/src/org/antlr/v4/codegen/target/SwiftTarget.java @@ -8,50 +8,14 @@ import org.antlr.v4.codegen.CodeGenerator; import org.antlr.v4.codegen.Target; -import org.antlr.v4.runtime.Token; -import org.antlr.v4.runtime.atn.ATN; -import org.antlr.v4.runtime.atn.ATNDeserializer; -import org.antlr.v4.runtime.atn.ATNState; -import org.antlr.v4.runtime.atn.ATNType; -import org.antlr.v4.runtime.atn.ActionTransition; -import org.antlr.v4.runtime.atn.AtomTransition; -import org.antlr.v4.runtime.atn.BlockStartState; -import org.antlr.v4.runtime.atn.DecisionState; -import org.antlr.v4.runtime.atn.LexerAction; -import org.antlr.v4.runtime.atn.LexerChannelAction; -import org.antlr.v4.runtime.atn.LexerCustomAction; -import org.antlr.v4.runtime.atn.LexerModeAction; -import org.antlr.v4.runtime.atn.LexerPushModeAction; -import org.antlr.v4.runtime.atn.LexerTypeAction; -import org.antlr.v4.runtime.atn.LoopEndState; -import org.antlr.v4.runtime.atn.PrecedencePredicateTransition; -import org.antlr.v4.runtime.atn.PredicateTransition; -import org.antlr.v4.runtime.atn.RangeTransition; -import org.antlr.v4.runtime.atn.RuleStartState; -import org.antlr.v4.runtime.atn.RuleTransition; -import org.antlr.v4.runtime.atn.SetTransition; -import org.antlr.v4.runtime.atn.Transition; -import org.antlr.v4.runtime.misc.IntegerList; -import org.antlr.v4.runtime.misc.Interval; -import org.antlr.v4.runtime.misc.IntervalSet; -import org.antlr.v4.tool.ErrorType; import org.antlr.v4.tool.Grammar; -import org.antlr.v4.tool.ast.GrammarAST; import org.stringtemplate.v4.ST; import org.stringtemplate.v4.STGroup; import org.stringtemplate.v4.StringRenderer; -import javax.json.Json; -import javax.json.JsonArrayBuilder; -import javax.json.JsonObject; -import javax.json.JsonObjectBuilder; -import java.io.IOException; -import java.io.Writer; -import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; -import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; @@ -60,7 +24,21 @@ public class SwiftTarget extends Target { /** * The Swift target can cache the code generation templates. */ - private static final ThreadLocal targetTemplates = new ThreadLocal(); + private static final ThreadLocal targetTemplates = new ThreadLocal<>(); + + protected static final Map targetCharValueEscape; + static { + // https://docs.swift.org/swift-book/LanguageGuide/StringsAndCharacters.html + HashMap map = new HashMap<>(); + addEscapedChar(map, '\0', '0'); + addEscapedChar(map, '\\'); + addEscapedChar(map, '\t', 't'); + addEscapedChar(map, '\n', 'n'); + addEscapedChar(map, '\r', 'r'); + addEscapedChar(map, '\"'); + addEscapedChar(map, '\''); + targetCharValueEscape = map; + } protected static final HashSet reservedWords = new HashSet<>(Arrays.asList( "associatedtype", "class", "deinit", "enum", "extension", "func", "import", "init", "inout", "internal", @@ -76,13 +54,15 @@ public class SwiftTarget extends Target { "rule", "parserRule" )); - public String lexerAtnJSON = null; - public String parserAtnJSON = null; - public SwiftTarget(CodeGenerator gen) { super(gen); } + @Override + public Map getTargetCharValueEscape() { + return targetCharValueEscape; + } + @Override protected Set getReservedWords() { return reservedWords; @@ -94,66 +74,10 @@ protected String escapeWord(String word) { } @Override - protected void genFile(Grammar g, - ST outputFileST, - String fileName) - { + protected void genFile(Grammar g, ST outputFileST, String fileName) { super.genFile(g,outputFileST,fileName); - - if (g.isLexer() && lexerAtnJSON == null) { - lexerAtnJSON = getLexerOrParserATNJson(g, fileName); - } - else if (!g.isLexer() && parserAtnJSON == null && g.atn != null) { - parserAtnJSON = getLexerOrParserATNJson(g, fileName); - } - - if (fileName.endsWith(CodeGenerator.VOCAB_FILE_EXTENSION)) { - String jsonFileName = fileName.substring(0,fileName.lastIndexOf(CodeGenerator.VOCAB_FILE_EXTENSION)); - if (lexerAtnJSON != null) { - jsonFileName = jsonFileName + "ATN.swift"; - // System.out.println(jsonFileName); - //System.out.println(lexerAtnJSON); - writeFile(lexerAtnJSON,g,jsonFileName); - } - - if (parserAtnJSON != null) { - jsonFileName = jsonFileName + "ParserATN.swift"; - // System.out.println(jsonFileName); - //System.out.println(parserAtnJSON); - writeFile(parserAtnJSON,g,jsonFileName); - } - } - -// else if (g instanceof ParseR) { -// System.out.println("parserGrammar"); -// } -// - //getCodeGenerator().write(outputFileST, fileName); - } - - private String getLexerOrParserATNJson(Grammar g, String fileName) { - ST extST = getTemplates().getInstanceOf("codeFileExtension"); - String className = fileName.substring(0,fileName.lastIndexOf(extST.render())); - - String JSON = "class " + className + "ATN {\n" + - " let jsonString: String = \"" + - serializeTojson(g.atn).replaceAll("\"","\\\\\"") +"\"\n}" ; //.replaceAll("\"", "\\\\\""); - return JSON; } - private void writeFile(String content,Grammar g,String fileName) { - - try { - Writer w = this.getCodeGenerator().tool.getOutputFileWriter(g, fileName); - w.write(content); - w.close(); - } - catch (IOException ioe) { - this.getCodeGenerator().tool.errMgr.toolError(ErrorType.CANNOT_WRITE_FILE, - ioe, - fileName); - } - } @Override protected STGroup loadTemplates() { STGroup result = targetTemplates.get(); @@ -165,359 +89,8 @@ protected STGroup loadTemplates() { return result; } - //added by janyou --> - public String serializeTojson(ATN atn) { - JsonObjectBuilder builder = Json.createObjectBuilder(); - builder.add("version", ATNDeserializer.SERIALIZED_VERSION); - - // convert grammar type to ATN const to avoid dependence on ANTLRParser - builder.add("grammarType",atn.grammarType.ordinal()); - builder.add("maxTokenType",atn.maxTokenType); - - //states - int nedges = 0; - - Map setIndices = new HashMap(); - List sets = new ArrayList(); - JsonArrayBuilder statesBuilder = Json.createArrayBuilder() ; - IntegerList nonGreedyStates = new IntegerList(); - IntegerList precedenceStates = new IntegerList(); - for (ATNState s : atn.states) { - JsonObjectBuilder stateBuilder = Json.createObjectBuilder(); - if ( s==null ) { // might be optimized away - statesBuilder.addNull(); - continue; - } - - int stateType = s.getStateType(); - - stateBuilder.add("stateType",stateType); - //stateBuilder.add("stateNumber",s.stateNumber); - stateBuilder.add("ruleIndex",s.ruleIndex); - - if (s instanceof DecisionState && ((DecisionState)s).nonGreedy) { - nonGreedyStates.add(s.stateNumber); - } - - if (s instanceof RuleStartState && ((RuleStartState)s).isLeftRecursiveRule) { - precedenceStates.add(s.stateNumber); - } - - - if ( s.getStateType() == ATNState.LOOP_END ) { - stateBuilder.add("detailStateNumber",((LoopEndState)s).loopBackState.stateNumber); - } - else if ( s instanceof BlockStartState ) { - stateBuilder.add("detailStateNumber",((BlockStartState)s).endState.stateNumber); - } - - if (s.getStateType() != ATNState.RULE_STOP) { - // the deserializer can trivially derive these edges, so there's no need to serialize them - nedges += s.getNumberOfTransitions(); - } - for (int i=0; i0 ) { - for (ATNState modeStartState : atn.modeToStartState) { - - modeToStartStateBuilder.add(modeStartState.stateNumber); - } - } - builder.add("modeToStartState",modeToStartStateBuilder); - - - JsonArrayBuilder nsetsBuilder = Json.createArrayBuilder() ; - int nsets = sets.size(); - //data.add(nsets); - builder.add("nsets",nsets); - - for (IntervalSet set : sets) { - JsonObjectBuilder setBuilder = Json.createObjectBuilder(); - boolean containsEof = set.contains(Token.EOF); - if (containsEof && set.getIntervals().get(0).b == Token.EOF) { - //data.add(set.getIntervals().size() - 1); - - setBuilder.add("size",set.getIntervals().size() - 1); - } - else { - //data.add(set.getIntervals().size()); - - setBuilder.add("size",set.getIntervals().size()); - } - setBuilder.add("containsEof",containsEof ? 1 : 0); - JsonArrayBuilder IntervalsBuilder = Json.createArrayBuilder() ; - for (Interval I : set.getIntervals()) { - JsonObjectBuilder IntervalBuilder = Json.createObjectBuilder(); - if (I.a == Token.EOF) { - if (I.b == Token.EOF) { - continue; - } - else { - IntervalBuilder.add("a",0); - //data.add(0); - } - } - else { - IntervalBuilder.add("a",I.a); - - //data.add(I.a); - } - IntervalBuilder.add("b",I.b); - IntervalsBuilder.add(IntervalBuilder); - } - setBuilder.add("Intervals",IntervalsBuilder); - nsetsBuilder.add(setBuilder); - } - - builder.add("IntervalSet",nsetsBuilder); - //builder.add("nedges",nedges); - JsonArrayBuilder allTransitionsBuilder = Json.createArrayBuilder() ; - - for (ATNState s : atn.states) { - - if ( s==null ) { - // might be optimized away - continue; - } - - if (s.getStateType() == ATNState.RULE_STOP) { - continue; - } - JsonArrayBuilder transitionsBuilder = Json.createArrayBuilder() ; - - for (int i=0; i Character.MAX_VALUE) { -// throw new UnsupportedOperationException("Serialized ATN data element out of range."); -// } -// -// int value = (data.get(i) + 2) & 0xFFFF; -// data.set(i, value); -// } - JsonObject data = builder.build(); - // System.out.print(data.toString()); - return data.toString(); - } - //<-- protected static class SwiftStringRenderer extends StringRenderer { - @Override public String toString(Object o, String formatString, Locale locale) { if ("java-escape".equals(formatString)) { @@ -527,6 +100,5 @@ public String toString(Object o, String formatString, Locale locale) { return super.toString(o, formatString, locale); } - } }