Skip to content

Commit 4429088

Browse files
committed
ICU4J: Fix parser bug where backup() was counting in code units rather than code points
1 parent 7dd36f3 commit 4429088

File tree

4 files changed

+58
-34
lines changed

4 files changed

+58
-34
lines changed

Diff for: icu4j/main/core/src/main/java/com/ibm/icu/message2/InputSource.java

+14-2
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,22 @@ int readCodePoint() {
6262
return c;
6363
}
6464

65-
// Backup a number of characters.
65+
// Backup a number of code points.
6666
void backup(int amount) {
67+
int amountBackedUp = 0;
6768
// TODO: validate
68-
cursor -= amount;
69+
while (amountBackedUp < amount) {
70+
backupOneCodePoint();
71+
amountBackedUp++;
72+
}
73+
}
74+
75+
void backupOneCodePoint() {
76+
if (Character.isLowSurrogate(buffer.charAt(cursor - 1))) {
77+
cursor -= 2;
78+
} else {
79+
cursor--;
80+
}
6981
}
7082

7183
int getPosition() {

Diff for: icu4j/main/core/src/main/java/com/ibm/icu/message2/MFParser.java

+41-31
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,12 @@ private MFDataModel.Message parseImpl() throws MFParseException {
5353
cp = input.readCodePoint();
5454
cp = input.peekChar();
5555
if (cp == '{') { // `{{`, complex body without declarations
56-
input.backup(1); // let complexBody deal with the wrapping {{ and }}
56+
input.backupOneCodePoint(); // let complexBody deal with the wrapping {{ and }}
5757
MFDataModel.Pattern pattern = getQuotedPattern();
5858
skipOptionalWhitespaces();
5959
result = new MFDataModel.PatternMessage(new ArrayList<>(), pattern);
6060
} else { // placeholder
61-
input.backup(1); // We want the '{' present, to detect the part as placeholder.
61+
input.backupOneCodePoint(); // We want the '{' present, to detect the part as placeholder.
6262
MFDataModel.Pattern pattern = getPattern();
6363
result = new MFDataModel.PatternMessage(new ArrayList<>(), pattern);
6464
}
@@ -129,7 +129,7 @@ private String getText() {
129129
if (StringUtils.isContentChar(cp) || StringUtils.isWhitespace(cp)) {
130130
result.appendCodePoint(cp);
131131
} else {
132-
input.backup(1);
132+
input.backupOneCodePoint();
133133
return result.toString();
134134
}
135135
}
@@ -361,37 +361,47 @@ private MFDataModel.Attribute getAttribute() throws MFParseException {
361361
return null;
362362
}
363363

364-
// abnf: reserved-body = *([s] 1*(reserved-char / reserved-escape / quoted))
365-
// abnf: reserved-escape = backslash ( backslash / "{" / "|" / "}" )
364+
// abnf: reserved-body = reserved-body-part *([s] reserved-body-part)
366365
private String getReservedBody() throws MFParseException {
367366
int spaceCount = skipWhitespaces();
368367
StringBuilder result = new StringBuilder();
368+
String firstPart = getReservedBodyPart();
369+
result.append(firstPart);
369370
while (true) {
370-
int cp = input.readCodePoint();
371-
if (StringUtils.isReservedChar(cp)) {
372-
result.appendCodePoint(cp);
373-
} else if (cp == '\\') {
374-
cp = input.readCodePoint();
375-
checkCondition(
376-
cp == '{' || cp == '|' || cp == '}',
377-
"Invalid escape sequence. Only \\{, \\| and \\} are valid here.");
378-
result.append(cp);
379-
} else if (cp == '|') {
380-
input.backup(1);
381-
MFDataModel.Literal quoted = getQuotedLiteral();
382-
result.append(quoted.value);
383-
} else if (cp == EOF) {
384-
return result.toString();
385-
} else {
386-
if (result.length() == 0) {
387-
input.backup(spaceCount + 1);
388-
return "";
389-
} else {
390-
input.backup(1);
391-
return result.toString();
392-
}
371+
spaceCount = skipWhitespaces();
372+
String nextPart = getReservedBodyPart();
373+
if (nextPart.length() == 0) {
374+
input.backup(spaceCount);
375+
break;
393376
}
377+
result.append(nextPart);
378+
}
379+
if (result.length() == 0) {
380+
input.backup(spaceCount);
381+
}
382+
return result.toString();
383+
}
384+
385+
// abnf: reserved-body-part = reserved-char / escaped-char / quoted-literal
386+
private String getReservedBodyPart() throws MFParseException {
387+
StringBuilder result = new StringBuilder();
388+
int cp = input.readCodePoint();
389+
if (StringUtils.isReservedChar(cp)) {
390+
result.appendCodePoint(cp);
391+
} else if (cp == '\\') {
392+
cp = input.readCodePoint();
393+
checkCondition(
394+
cp == '{' || cp == '|' || cp == '}',
395+
"Invalid escape sequence. Only \\{, \\| and \\} are valid here.");
396+
result.append(cp);
397+
} else if (cp == '|') {
398+
input.backup(1);
399+
MFDataModel.Literal quoted = getQuotedLiteral();
400+
result.append(quoted.value);
401+
} else {
402+
input.backup(1);
394403
}
404+
return result.toString();
395405
}
396406

397407
// abnf: identifier = [namespace ":"] name
@@ -408,7 +418,7 @@ private String getIdentifier() throws MFParseException {
408418
checkCondition(name != null, "Expected name after namespace '" + namespace + "'");
409419
return namespace + ":" + name;
410420
} else {
411-
input.backup(1);
421+
input.backupOneCodePoint();
412422
}
413423
return namespace;
414424
}
@@ -479,7 +489,7 @@ private MFDataModel.Literal getLiteral() throws MFParseException {
479489
MFDataModel.Literal ql = getQuotedLiteral();
480490
return ql;
481491
default: // unquoted
482-
input.backup(1);
492+
input.backupOneCodePoint();
483493
MFDataModel.Literal unql = getUnQuotedLiteral();
484494
return unql;
485495
}
@@ -561,7 +571,7 @@ private int skipWhitespaces() {
561571
return skipCount;
562572
}
563573
if (!StringUtils.isWhitespace(cp)) {
564-
input.backup(1);
574+
input.backupOneCodePoint();
565575
return skipCount;
566576
}
567577
skipCount++;

Diff for: icu4j/main/core/src/test/java/com/ibm/icu/dev/test/message2/CoreTest.java

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ public class CoreTest extends CoreTestFmwk {
3030
"more-data-model-errors.json",
3131
"more-functions.json",
3232
"reserved-syntax.json",
33+
"reserved-syntax-2.json",
3334
"resolution-errors.json",
3435
"runtime-errors.json",
3536
"spec/data-model-errors.json",

Diff for: testdata/message2/reserved-syntax.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@
5050
{ "src" : ".input{ $n ~ }{{{$n}}}", "ignoreJava": "ICU4J doesn't error out on reserved annotations" },
5151
{ "src" : ".i {1} {{}}", "ignoreJava": "ICU4J doesn't error out on reserved annotations", "expErrors": [{ "type": "unsupported-statement" }] },
5252
{ "src" : ".l $y = {|bar|} {{}}", "ignoreJava": "ICU4J doesn't error out on reserved annotations", "expErrors": [{ "type": "unsupported-statement" }] },
53-
{ "src" : ".򱋿󠆿 . |@| {$󛒁񓝖 & \\{ . @𯼼򋼡= $򵀀񓞈}{{\\\\}}", "ignoreJava": "ICU4J doesn't error out on reserved annotations", "expErrors": [{ "type": "unsupported-statement" }] }
53+
{ "src" : ".򱋿󠆿 . |@| {$󛒁񓝖 & \\{ . @𯼼򋼡= $򵀀񓞈}{{\\\\}}", "ignoreJava": "ICU4J doesn't error out on reserved annotations", "expErrors": [{ "type": "unsupported-statement" }] },
54+
{ "src" : "\\\\{ ? 󗟋 𫓜|@} |}", "ignoreJava": "ICU4J doesn't error out on reserved annotations" }
5455
]
5556
}
5657

0 commit comments

Comments
 (0)