Skip to content

Commit

Permalink
ICU4J: Undo change to backup() and make uses of backup() and readCode…
Browse files Browse the repository at this point in the history
…Point() consistent

Fix three locations where `cp = readCodePoint()` was followed by `backup(1)`, in a context where
`cp` might be a wide char:

`getIdentifier()`: Use `peekChar()` rather than `readCodePoint()` followed by `backup()`
   (an identifier could be followed by a wide char that is not a name-char)
`skipWhitespaces()`: The same change (a whitespace could be followed by a wide char)
`getName()`: names can include wide chars

The tests are in 4429088
  • Loading branch information
catamorphism committed Aug 9, 2024
1 parent cb3594b commit eee547c
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 25 deletions.
34 changes: 20 additions & 14 deletions icu4j/main/core/src/main/java/com/ibm/icu/message2/InputSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,24 @@ int peekChar() {
return buffer.charAt(cursor);
}

int peekCodePoint() {
if (atEnd()) {
return -1;
}
char c = buffer.charAt(cursor);
if (Character.isHighSurrogate(c)) {
if (cursor + 1 < buffer.length()) {
char c2 = buffer.charAt(cursor + 1);
if (Character.isLowSurrogate(c2)) {
return Character.toCodePoint(c, c2);
} else { // invalid, high surrogate followed by non-surrogate
return c;
}
}
}
return c;
}

int readCodePoint() {
// TODO: remove this?
// START Detect possible infinite loop
Expand Down Expand Up @@ -62,22 +80,10 @@ int readCodePoint() {
return c;
}

// Backup a number of code points.
// Backup a number of characters.
void backup(int amount) {
int amountBackedUp = 0;
// TODO: validate
while (amountBackedUp < amount) {
backupOneCodePoint();
amountBackedUp++;
}
}

void backupOneCodePoint() {
if (Character.isLowSurrogate(buffer.charAt(cursor - 1))) {
cursor -= 2;
} else {
cursor--;
}
cursor -= amount;
}

int getPosition() {
Expand Down
22 changes: 11 additions & 11 deletions icu4j/main/core/src/main/java/com/ibm/icu/message2/MFParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ private MFDataModel.Message parseImpl() throws MFParseException {
cp = input.readCodePoint();
cp = input.peekChar();
if (cp == '{') { // `{{`, complex body without declarations
input.backupOneCodePoint(); // let complexBody deal with the wrapping {{ and }}
input.backup(1); // let complexBody deal with the wrapping {{ and }}
MFDataModel.Pattern pattern = getQuotedPattern();
skipOptionalWhitespaces();
result = new MFDataModel.PatternMessage(new ArrayList<>(), pattern);
} else { // placeholder
input.backupOneCodePoint(); // We want the '{' present, to detect the part as placeholder.
input.backup(1); // We want the '{' present, to detect the part as placeholder.
MFDataModel.Pattern pattern = getPattern();
result = new MFDataModel.PatternMessage(new ArrayList<>(), pattern);
}
Expand Down Expand Up @@ -129,7 +129,7 @@ private String getText() {
if (StringUtils.isContentChar(cp) || StringUtils.isWhitespace(cp)) {
result.appendCodePoint(cp);
} else {
input.backupOneCodePoint();
input.backup(1);
return result.toString();
}
}
Expand Down Expand Up @@ -413,13 +413,12 @@ private String getIdentifier() throws MFParseException {
if (namespace == null) {
return null;
}
int cp = input.readCodePoint();
int cp = input.peekChar();
if (cp == ':') { // the previous name was namespace
input.readCodePoint(); // Consume the ':'
String name = getName();
checkCondition(name != null, "Expected name after namespace '" + namespace + "'");
return namespace + ":" + name;
} else {
input.backupOneCodePoint();
}
return namespace;
}
Expand Down Expand Up @@ -490,7 +489,7 @@ private MFDataModel.Literal getLiteral() throws MFParseException {
MFDataModel.Literal ql = getQuotedLiteral();
return ql;
default: // unquoted
input.backupOneCodePoint();
input.backup(1);
MFDataModel.Literal unql = getUnQuotedLiteral();
return unql;
}
Expand Down Expand Up @@ -567,14 +566,14 @@ private int skipOptionalWhitespaces() {
private int skipWhitespaces() {
int skipCount = 0;
while (true) {
int cp = input.readCodePoint();
int cp = input.peekChar();
if (cp == EOF) {
return skipCount;
}
if (!StringUtils.isWhitespace(cp)) {
input.backupOneCodePoint();
return skipCount;
}
input.readCodePoint();
skipCount++;
}
}
Expand Down Expand Up @@ -798,13 +797,14 @@ private String getName() throws MFParseException {
}
result.appendCodePoint(cp);
while (true) {
cp = input.readCodePoint();
cp = input.peekCodePoint();
if (StringUtils.isNameChar(cp)) {
cp = input.readCodePoint();
result.appendCodePoint(cp);
} else if (cp == EOF) {
input.readCodePoint();
break;
} else {
input.backup(1);
break;
}
}
Expand Down

0 comments on commit eee547c

Please sign in to comment.