From c50edd677ffdef4e4df09f13034da9c9a670dfb0 Mon Sep 17 00:00:00 2001 From: trobro Date: Sat, 14 Oct 2023 17:50:17 +0200 Subject: [PATCH] Escape all control chars (#36) --- assets/charset2_result.hjson | 5 +++ assets/charset2_result.json | 5 +++ assets/charset2_test.hjson | 5 +++ assets/testlist.txt | 3 +- src/main/org/hjson/HjsonWriter.java | 70 ++++++++--------------------- src/main/org/hjson/JsonWriter.java | 46 +++++++------------ src/test/org/hjson/test/Main.java | 2 - 7 files changed, 51 insertions(+), 85 deletions(-) create mode 100644 assets/charset2_result.hjson create mode 100644 assets/charset2_result.json create mode 100644 assets/charset2_test.hjson diff --git a/assets/charset2_result.hjson b/assets/charset2_result.hjson new file mode 100644 index 0000000..e04f07b --- /dev/null +++ b/assets/charset2_result.hjson @@ -0,0 +1,5 @@ +{ + uescape: "\u0000,\u0001,\uffff" + "um\u000blaut": äöüßÄÖÜ + hex: ģ䕧覫췯ꯍ +} \ No newline at end of file diff --git a/assets/charset2_result.json b/assets/charset2_result.json new file mode 100644 index 0000000..7e4c374 --- /dev/null +++ b/assets/charset2_result.json @@ -0,0 +1,5 @@ +{ + "uescape": "\u0000,\u0001,\uffff", + "um\u000blaut": "äöüßÄÖÜ", + "hex": "ģ䕧覫췯ꯍ" +} \ No newline at end of file diff --git a/assets/charset2_test.hjson b/assets/charset2_test.hjson new file mode 100644 index 0000000..41a685d --- /dev/null +++ b/assets/charset2_test.hjson @@ -0,0 +1,5 @@ +{ + uescape: "\u0000,\u0001,\uffff" + "um\u000blaut": äöüßÄÖÜ + hex: "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A" +} \ No newline at end of file diff --git a/assets/testlist.txt b/assets/testlist.txt index 49adf34..07cdf03 100644 --- a/assets/testlist.txt +++ b/assets/testlist.txt @@ -1,4 +1,5 @@ charset_test.hjson +charset2_test.hjson comments_test.hjson empty_test.hjson failCharset1_test.hjson @@ -83,4 +84,4 @@ stringify/quotes_strings_ml_test.json stringify/quotes_strings_test.hjson extra/notabs_test.json extra/root_test.hjson -extra/separator_test.json \ No newline at end of file +extra/separator_test.json diff --git a/src/main/org/hjson/HjsonWriter.java b/src/main/org/hjson/HjsonWriter.java index 2331a9f..fc024c2 100644 --- a/src/main/org/hjson/HjsonWriter.java +++ b/src/main/org/hjson/HjsonWriter.java @@ -30,6 +30,13 @@ class HjsonWriter { private IHjsonDsfProvider[] dsfProviders; + static String commonRange = "\\x7f-\\x9f\\x{00ad}\\x{0600}-\\x{0604}\\x{070f}\\x{17b4}\\x{17b5}\\x{200c}-\\x{200f}\\x{2028}-\\x{202f}\\x{2060}-\\x{206f}\\x{feff}\\x{fff0}-\\x{ffff}"; + // needsEscape tests if the string can be written without escapes + static Pattern needsEscape = Pattern.compile("[\\\\\\\"\\x00-\\x1f" + commonRange + "]"); + // needsQuotes tests if the string can be written as a quoteless string (includes needsEscape but without \\\\ and \\") + static Pattern needsQuotes = Pattern.compile("^\\s|^\"|^'|^#|^/\\*|^//|^\\{|^\\}|^\\[|^\\]|^:|^,|\\s$|[\\x00-\\x1f\\x7f-\\x9f\\x{00ad}\\x{0600}-\\x{0604}\\x{070f}\\x{17b4}\\x{17b5}\\x{200c}-\\x{200f}\\x{2028}-\\x{202f}\\x{2060}-\\x{206f}\\x{feff}\\x{fff0}-\\x{ffff}]"); + // needsEscapeML tests if the string can be written as a multiline string (like needsEscape but without \\n, \\\\, \\", \\t) + static Pattern needsEscapeML = Pattern.compile("'''|^[\\s]+$|[\\x00-\\x08\\x0b-\\x1f" + commonRange + "]"); static Pattern needsEscapeName=Pattern.compile("[,\\{\\[\\}\\]\\s:#\"']|//|/\\*"); public HjsonWriter(HjsonOptions options) { @@ -104,10 +111,13 @@ public void save(JsonValue value, Writer tw, int level, String separator, boolea } static String escapeName(String name) { - if (name.length()==0 || needsEscapeName.matcher(name).find()) + if (name.length()==0 || needsEscapeName.matcher(name).find() || + needsEscape.matcher(name).find()) + { return "\""+JsonWriter.escapeString(name)+"\""; - else + } else { return name; + } } void writeString(String value, Writer tw, int level, String separator) throws IOException { @@ -115,11 +125,7 @@ void writeString(String value, Writer tw, int level, String separator) throws IO char left=value.charAt(0), right=value.charAt(value.length()-1); char left1=value.length()>1?value.charAt(1):'\0', left2=value.length()>2?value.charAt(2):'\0'; - boolean doEscape=false; - char[] valuec=value.toCharArray(); - for(char ch : valuec) { - if (needsQuotes(ch)) { doEscape=true; break; } - } + boolean doEscape=needsQuotes.matcher(value).find(); if (doEscape || HjsonParser.isWhiteSpace(left) || HjsonParser.isWhiteSpace(right) || @@ -136,17 +142,13 @@ void writeString(String value, Writer tw, int level, String separator) throws IO // format or we must replace the offending characters with safe escape // sequences. - boolean noEscape=true; - for(char ch : valuec) { if (needsEscape(ch)) { noEscape=false; break; } } - if (noEscape) { tw.write(separator+"\""+value+"\""); return; } - - boolean noEscapeML=true, allWhite=true; - for(char ch : valuec) { - if (needsEscapeML(ch)) { noEscapeML=false; break; } - else if (!HjsonParser.isWhiteSpace(ch)) allWhite=false; + if (!needsEscape.matcher(value).find()) { + tw.write(separator+"\""+value+"\""); + } else if (!needsEscapeML.matcher(value).find()) { + writeMLString(value, tw, level, separator); + } else { + tw.write(separator+"\""+JsonWriter.escapeString(value)+"\""); } - if (noEscapeML && !allWhite && !value.contains("'''")) writeMLString(value, tw, level, separator); - else tw.write(separator+"\""+JsonWriter.escapeString(value)+"\""); } else tw.write(separator+value); } @@ -183,38 +185,4 @@ static boolean startsWithKeyword(String text) { char ch=text.charAt(p); return ch==',' || ch=='}' || ch==']' || ch=='#' || ch=='/' && (text.length()>p+1 && (text.charAt(p+1)=='/' || text.charAt(p+1)=='*')); } - - static boolean needsQuotes(char c) { - switch (c) { - case '\t': - case '\f': - case '\b': - case '\n': - case '\r': - return true; - default: - return false; - } - } - - static boolean needsEscape(char c) { - switch (c) { - case '\"': - case '\\': - return true; - default: - return needsQuotes(c); - } - } - - static boolean needsEscapeML(char c) { - switch (c) { - case '\n': - case '\r': - case '\t': - return false; - default: - return needsQuotes(c); - } - } } diff --git a/src/main/org/hjson/JsonWriter.java b/src/main/org/hjson/JsonWriter.java index 20eb877..f8b58e8 100644 --- a/src/main/org/hjson/JsonWriter.java +++ b/src/main/org/hjson/JsonWriter.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.io.Writer; +import java.util.regex.Matcher; class JsonWriter { @@ -90,42 +91,25 @@ public void save(JsonValue value, Writer tw, int level) throws IOException { } } - static String escapeName(String name) { - boolean needsEscape=name.length()==0; - for(char ch : name.toCharArray()) { - if (HjsonParser.isWhiteSpace(ch) || ch=='{' || ch=='}' || ch=='[' || ch==']' || ch==',' || ch==':') { - needsEscape=true; - break; - } - } - if (needsEscape) return "\""+JsonWriter.escapeString(name)+"\""; - else return name; - } - static String escapeString(String src) { if (src==null) return null; - for (int i=0; i0) sb.append(src, 0, i); - return doEscapeString(sb, src, i); - } + int i = 0; + StringBuilder sb=new StringBuilder(); + Matcher m = HjsonWriter.needsEscape.matcher(src); + + while (m.find()) { + // Assume all matches are single chars. + sb.append(src, i, m.start()).append(getEscapedChar(m.group().charAt(0))); + i = m.end(); } - return src; - } - private static String doEscapeString(StringBuilder sb, String src, int cur) { - int start=cur; - for (int i=cur; i=0 && file.substring(extIdx).equals(".json"); boolean shouldFail=name.startsWith("fail"); JsonValue.setEol(outputCr?"\r\n":"\n");