Skip to content

Commit

Permalink
escape all control chars
Browse files Browse the repository at this point in the history
  • Loading branch information
trobro committed Oct 14, 2023
1 parent d7a1076 commit 136f7bd
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 83 deletions.
5 changes: 5 additions & 0 deletions assets/charset2_result.hjson
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
uescape: "\u0000,\u0001,\uffff"
"um\u000blaut": äöüßÄÖÜ
hex: ģ䕧覫췯ꯍ
}
5 changes: 5 additions & 0 deletions assets/charset2_result.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"uescape": "\u0000,\u0001,\uffff",
"um\u000blaut": "äöüßÄÖÜ",
"hex": "ģ䕧覫췯ꯍ"
}
5 changes: 5 additions & 0 deletions assets/charset2_test.hjson
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
uescape: "\u0000,\u0001,\uffff"
"um\u000blaut": äöüßÄÖÜ
hex: "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A"
}
3 changes: 2 additions & 1 deletion assets/testlist.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
charset_test.hjson
charset2_test.hjson
comments_test.hjson
empty_test.hjson
failCharset1_test.hjson
Expand Down Expand Up @@ -83,4 +84,4 @@ stringify/quotes_strings_ml_test.json
stringify/quotes_strings_test.hjson
extra/notabs_test.json
extra/root_test.hjson
extra/separator_test.json
extra/separator_test.json
70 changes: 19 additions & 51 deletions src/main/org/hjson/HjsonWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ class HjsonWriter {

private IHjsonDsfProvider[] dsfProviders;

static String commonRange = "\\x7f-\\x9f\\x{00ad}\\x{0600}-\\x{0604}\\x{070f}\\x{17b4}\\x{17b5}\\x{200c}-\\x{200f}\\x{2028}-\\x{202f}\\x{2060}-\\x{206f}\\x{feff}\\x{fff0}-\\x{ffff}";
// needsEscape tests if the string can be written without escapes
static Pattern needsEscape = Pattern.compile("[\\\\\\\"\\x00-\\x1f" + commonRange + "]");
// needsQuotes tests if the string can be written as a quoteless string (includes needsEscape but without \\\\ and \\")
static Pattern needsQuotes = Pattern.compile("^\\s|^\"|^'|^#|^/\\*|^//|^\\{|^\\}|^\\[|^\\]|^:|^,|\\s$|[\\x00-\\x1f\\x7f-\\x9f\\x{00ad}\\x{0600}-\\x{0604}\\x{070f}\\x{17b4}\\x{17b5}\\x{200c}-\\x{200f}\\x{2028}-\\x{202f}\\x{2060}-\\x{206f}\\x{feff}\\x{fff0}-\\x{ffff}]");
// needsEscapeML tests if the string can be written as a multiline string (like needsEscape but without \\n, \\\\, \\", \\t)
static Pattern needsEscapeML = Pattern.compile("'''|^[\\s]+$|[\\x00-\\x08\\x0b-\\x1f" + commonRange + "]");
static Pattern needsEscapeName=Pattern.compile("[,\\{\\[\\}\\]\\s:#\"']|//|/\\*");

public HjsonWriter(HjsonOptions options) {
Expand Down Expand Up @@ -104,22 +111,21 @@ public void save(JsonValue value, Writer tw, int level, String separator, boolea
}

static String escapeName(String name) {
if (name.length()==0 || needsEscapeName.matcher(name).find())
if (name.length()==0 || needsEscapeName.matcher(name).find() ||
needsEscape.matcher(name).find())
{
return "\""+JsonWriter.escapeString(name)+"\"";
else
} else {
return name;
}
}

void writeString(String value, Writer tw, int level, String separator) throws IOException {
if (value.length()==0) { tw.write(separator+"\"\""); return; }

char left=value.charAt(0), right=value.charAt(value.length()-1);
char left1=value.length()>1?value.charAt(1):'\0', left2=value.length()>2?value.charAt(2):'\0';
boolean doEscape=false;
char[] valuec=value.toCharArray();
for(char ch : valuec) {
if (needsQuotes(ch)) { doEscape=true; break; }
}
boolean doEscape=needsQuotes.matcher(value).find();

if (doEscape ||
HjsonParser.isWhiteSpace(left) || HjsonParser.isWhiteSpace(right) ||
Expand All @@ -136,17 +142,13 @@ void writeString(String value, Writer tw, int level, String separator) throws IO
// format or we must replace the offending characters with safe escape
// sequences.

boolean noEscape=true;
for(char ch : valuec) { if (needsEscape(ch)) { noEscape=false; break; } }
if (noEscape) { tw.write(separator+"\""+value+"\""); return; }

boolean noEscapeML=true, allWhite=true;
for(char ch : valuec) {
if (needsEscapeML(ch)) { noEscapeML=false; break; }
else if (!HjsonParser.isWhiteSpace(ch)) allWhite=false;
if (!needsEscape.matcher(value).find()) {
tw.write(separator+"\""+value+"\"");
} else if (!needsEscapeML.matcher(value).find()) {
writeMLString(value, tw, level, separator);
} else {
tw.write(separator+"\""+JsonWriter.escapeString(value)+"\"");
}
if (noEscapeML && !allWhite && !value.contains("'''")) writeMLString(value, tw, level, separator);
else tw.write(separator+"\""+JsonWriter.escapeString(value)+"\"");
}
else tw.write(separator+value);
}
Expand Down Expand Up @@ -183,38 +185,4 @@ static boolean startsWithKeyword(String text) {
char ch=text.charAt(p);
return ch==',' || ch=='}' || ch==']' || ch=='#' || ch=='/' && (text.length()>p+1 && (text.charAt(p+1)=='/' || text.charAt(p+1)=='*'));
}

static boolean needsQuotes(char c) {
switch (c) {
case '\t':
case '\f':
case '\b':
case '\n':
case '\r':
return true;
default:
return false;
}
}

static boolean needsEscape(char c) {
switch (c) {
case '\"':
case '\\':
return true;
default:
return needsQuotes(c);
}
}

static boolean needsEscapeML(char c) {
switch (c) {
case '\n':
case '\r':
case '\t':
return false;
default:
return needsQuotes(c);
}
}
}
46 changes: 15 additions & 31 deletions src/main/org/hjson/JsonWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

import java.io.IOException;
import java.io.Writer;
import java.util.regex.Matcher;


class JsonWriter {
Expand Down Expand Up @@ -90,42 +91,25 @@ public void save(JsonValue value, Writer tw, int level) throws IOException {
}
}

static String escapeName(String name) {
boolean needsEscape=name.length()==0;
for(char ch : name.toCharArray()) {
if (HjsonParser.isWhiteSpace(ch) || ch=='{' || ch=='}' || ch=='[' || ch==']' || ch==',' || ch==':') {
needsEscape=true;
break;
}
}
if (needsEscape) return "\""+JsonWriter.escapeString(name)+"\"";
else return name;
}

static String escapeString(String src) {
if (src==null) return null;

for (int i=0; i<src.length(); i++) {
if (getEscapedChar(src.charAt(i))!=null) {
StringBuilder sb=new StringBuilder();
if (i>0) sb.append(src, 0, i);
return doEscapeString(sb, src, i);
}
int i = 0;
StringBuilder sb=new StringBuilder();
Matcher m = HjsonWriter.needsEscape.matcher(src);

while (m.find()) {
// Assume all matches are single chars.
sb.append(src, i, m.start()).append(getEscapedChar(m.group().charAt(0)));
i = m.end();
}
return src;
}

private static String doEscapeString(StringBuilder sb, String src, int cur) {
int start=cur;
for (int i=cur; i<src.length(); i++) {
String escaped=getEscapedChar(src.charAt(i));
if (escaped!=null) {
sb.append(src, start, i);
sb.append(escaped);
start=i+1;
}
if (i < 1) {
return src;
}
sb.append(src, start, src.length());

sb.append(src, i, src.length());

return sb.toString();
}

Expand All @@ -138,7 +122,7 @@ private static String getEscapedChar(char c) {
case '\f': return "\\f";
case '\b': return "\\b";
case '\\': return "\\\\";
default: return null;
default: return "\\u" + String.format("%04x", (int) c);
}
}
}

0 comments on commit 136f7bd

Please sign in to comment.