Skip to content

Commit 96e081f

Browse files
committed
ICU4J: Fix parser bug where backup() was counting in code units rather than code points
1 parent 825cdf9 commit 96e081f

File tree

4 files changed

+59
-34
lines changed

4 files changed

+59
-34
lines changed

Diff for: icu4j/main/core/src/main/java/com/ibm/icu/message2/InputSource.java

+14-2
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,22 @@ int readCodePoint() {
6262
return c;
6363
}
6464

65-
// Backup a number of characters.
65+
// Backup a number of code points.
6666
void backup(int amount) {
67+
int amountBackedUp = 0;
6768
// TODO: validate
68-
cursor -= amount;
69+
while (amountBackedUp < amount) {
70+
backupOneCodePoint();
71+
amountBackedUp++;
72+
}
73+
}
74+
75+
void backupOneCodePoint() {
76+
if (Character.isLowSurrogate(buffer.charAt(cursor - 1))) {
77+
cursor -= 2;
78+
} else {
79+
cursor--;
80+
}
6981
}
7082

7183
int getPosition() {

Diff for: icu4j/main/core/src/main/java/com/ibm/icu/message2/MFParser.java

+42-31
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,12 @@ private MFDataModel.Message parseImpl() throws MFParseException {
5353
cp = input.readCodePoint();
5454
cp = input.peekChar();
5555
if (cp == '{') { // `{{`, complex body without declarations
56-
input.backup(1); // let complexBody deal with the wrapping {{ and }}
56+
input.backupOneCodePoint(); // let complexBody deal with the wrapping {{ and }}
5757
MFDataModel.Pattern pattern = getQuotedPattern();
5858
skipOptionalWhitespaces();
5959
result = new MFDataModel.PatternMessage(new ArrayList<>(), pattern);
6060
} else { // placeholder
61-
input.backup(1); // We want the '{' present, to detect the part as placeholder.
61+
input.backupOneCodePoint(); // We want the '{' present, to detect the part as placeholder.
6262
MFDataModel.Pattern pattern = getPattern();
6363
result = new MFDataModel.PatternMessage(new ArrayList<>(), pattern);
6464
}
@@ -129,7 +129,7 @@ private String getText() {
129129
if (StringUtils.isContentChar(cp) || StringUtils.isWhitespace(cp)) {
130130
result.appendCodePoint(cp);
131131
} else {
132-
input.backup(1);
132+
input.backupOneCodePoint();
133133
return result.toString();
134134
}
135135
}
@@ -361,37 +361,48 @@ private MFDataModel.Attribute getAttribute() throws MFParseException {
361361
return null;
362362
}
363363

364-
// abnf: reserved-body = *([s] 1*(reserved-char / reserved-escape / quoted))
365-
// abnf: reserved-escape = backslash ( backslash / "{" / "|" / "}" )
364+
// abnf: reserved-body = reserved-body-part *([s] reserved-body-part)
366365
private String getReservedBody() throws MFParseException {
367366
int spaceCount = skipWhitespaces();
368367
StringBuilder result = new StringBuilder();
368+
input.dump();
369+
String firstPart = getReservedBodyPart();
370+
result.append(firstPart);
369371
while (true) {
370-
int cp = input.readCodePoint();
371-
if (StringUtils.isReservedChar(cp)) {
372-
result.appendCodePoint(cp);
373-
} else if (cp == '\\') {
374-
cp = input.readCodePoint();
375-
checkCondition(
376-
cp == '{' || cp == '|' || cp == '}',
377-
"Invalid escape sequence. Only \\{, \\| and \\} are valid here.");
378-
result.append(cp);
379-
} else if (cp == '|') {
380-
input.backup(1);
381-
MFDataModel.Literal quoted = getQuotedLiteral();
382-
result.append(quoted.value);
383-
} else if (cp == EOF) {
384-
return result.toString();
385-
} else {
386-
if (result.length() == 0) {
387-
input.backup(spaceCount + 1);
388-
return "";
389-
} else {
390-
input.backup(1);
391-
return result.toString();
392-
}
372+
spaceCount = skipWhitespaces();
373+
String nextPart = getReservedBodyPart();
374+
if (nextPart.length() == 0) {
375+
input.backup(spaceCount);
376+
break;
393377
}
378+
result.append(nextPart);
379+
}
380+
if (result.length() == 0) {
381+
input.backup(spaceCount);
382+
}
383+
return result.toString();
384+
}
385+
386+
// abnf: reserved-body-part = reserved-char / escaped-char / quoted-literal
387+
private String getReservedBodyPart() throws MFParseException {
388+
StringBuilder result = new StringBuilder();
389+
int cp = input.readCodePoint();
390+
if (StringUtils.isReservedChar(cp)) {
391+
result.appendCodePoint(cp);
392+
} else if (cp == '\\') {
393+
cp = input.readCodePoint();
394+
checkCondition(
395+
cp == '{' || cp == '|' || cp == '}',
396+
"Invalid escape sequence. Only \\{, \\| and \\} are valid here.");
397+
result.append(cp);
398+
} else if (cp == '|') {
399+
input.backup(1);
400+
MFDataModel.Literal quoted = getQuotedLiteral();
401+
result.append(quoted.value);
402+
} else {
403+
input.backup(1);
394404
}
405+
return result.toString();
395406
}
396407

397408
// abnf: identifier = [namespace ":"] name
@@ -408,7 +419,7 @@ private String getIdentifier() throws MFParseException {
408419
checkCondition(name != null, "Expected name after namespace '" + namespace + "'");
409420
return namespace + ":" + name;
410421
} else {
411-
input.backup(1);
422+
input.backupOneCodePoint();
412423
}
413424
return namespace;
414425
}
@@ -479,7 +490,7 @@ private MFDataModel.Literal getLiteral() throws MFParseException {
479490
MFDataModel.Literal ql = getQuotedLiteral();
480491
return ql;
481492
default: // unquoted
482-
input.backup(1);
493+
input.backupOneCodePoint();
483494
MFDataModel.Literal unql = getUnQuotedLiteral();
484495
return unql;
485496
}
@@ -561,7 +572,7 @@ private int skipWhitespaces() {
561572
return skipCount;
562573
}
563574
if (!StringUtils.isWhitespace(cp)) {
564-
input.backup(1);
575+
input.backupOneCodePoint();
565576
return skipCount;
566577
}
567578
skipCount++;

Diff for: icu4j/main/core/src/test/java/com/ibm/icu/dev/test/message2/CoreTest.java

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ public class CoreTest extends CoreTestFmwk {
3232
"more-functions.json",
3333
"more-functions-multiline.json",
3434
"reserved-syntax.json",
35+
"reserved-syntax-2.json",
3536
"resolution-errors.json",
3637
"runtime-errors.json",
3738
"spec/data-model-errors.json",

Diff for: testdata/message2/reserved-syntax.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@
5050
{ "src" : ".input{ $n ~ }{{{$n}}}", "ignoreJava": "ICU4J doesn't error out on reserved annotations" },
5151
{ "src" : ".i {1} {{}}", "ignoreJava": "ICU4J doesn't error out on reserved annotations", "expErrors": [{ "type": "unsupported-statement" }] },
5252
{ "src" : ".l $y = {|bar|} {{}}", "ignoreJava": "ICU4J doesn't error out on reserved annotations", "expErrors": [{ "type": "unsupported-statement" }] },
53-
{ "src" : ".򱋿󠆿 . |@| {$󛒁񓝖 & \\{ . @𯼼򋼡= $򵀀񓞈}{{\\\\}}", "ignoreJava": "ICU4J doesn't error out on reserved annotations", "expErrors": [{ "type": "unsupported-statement" }] }
53+
{ "src" : ".򱋿󠆿 . |@| {$󛒁񓝖 & \\{ . @𯼼򋼡= $򵀀񓞈}{{\\\\}}", "ignoreJava": "ICU4J doesn't error out on reserved annotations", "expErrors": [{ "type": "unsupported-statement" }] },
54+
{ "src" : "\\\\{ ? 󗟋 𫓜|@} |}", "ignoreJava": "ICU4J doesn't error out on reserved annotations" }
5455
]
5556
}
5657

0 commit comments

Comments
 (0)