From 4b376f13642529939d9c59175fcc8e1527de7d55 Mon Sep 17 00:00:00 2001
From: Tim Chevalier <tjc@igalia.com>
Date: Tue, 30 Jul 2024 16:16:05 -0700
Subject: [PATCH] ICU-22898 MF2 parser bug fixes

ICU4C: Escape curly braces when serializing and normalizing
ICU4C: Escape '|' in patterns
ICU4C: When normalizing input, escape optionally-escaped characters in patterns
ICU4C/ICU4J: Allow trailing whitespace after a match
ICU4C: Fix parser to iterate over code points, not code units
Add tests with old reserved syntax as syntax-error tests
---
 icu4c/source/i18n/messageformat2_parser.cpp   | 620 +++++++++---------
 icu4c/source/i18n/messageformat2_parser.h     |  13 +-
 .../source/i18n/messageformat2_serializer.cpp |  29 +-
 .../intltest/messageformat2test_read_json.cpp |   2 +
 .../java/com/ibm/icu/message2/MFParser.java   |   6 +-
 .../ibm/icu/dev/test/message2/CoreTest.java   |   1 +
 .../message2/syntax-errors-diagnostics.json   |   9 +-
 testdata/message2/syntax-errors-reserved.json |  22 +
 testdata/message2/valid-tests.json            |  28 +
 9 files changed, 408 insertions(+), 322 deletions(-)
 create mode 100644 testdata/message2/syntax-errors-reserved.json

diff --git a/icu4c/source/i18n/messageformat2_parser.cpp b/icu4c/source/i18n/messageformat2_parser.cpp
index 462ee6cc22eb..ad11324a964d 100644
--- a/icu4c/source/i18n/messageformat2_parser.cpp
+++ b/icu4c/source/i18n/messageformat2_parser.cpp
@@ -24,21 +24,22 @@ using namespace data_model;
     The `ERROR()` macro sets a syntax error in the context
     and sets the offset in `parseError` to `index`. It does not alter control flow.
 */
-#define ERROR(parseError, errorCode, index)                                                             \
+#define ERROR(errorCode)                                                                                \
     if (!errors.hasSyntaxError()) {                                                                     \
         setParseError(parseError, index);                                                               \
         errors.addSyntaxError(errorCode);                                                               \
     }
 
-// Returns true iff `index` is a valid index for the string `source`
-static bool inBounds(const UnicodeString &source, uint32_t index) {
-    return static_cast<int32_t>(index) < source.length();
-}
+#define ERROR_AT(errorCode, i)                                                                          \
+    if (!errors.hasSyntaxError()) {                                                                     \
+        setParseError(parseError, i);                                                                   \
+        errors.addSyntaxError(errorCode);                                                               \
+    }
 
 // Increments the line number and updates the "characters seen before
-// current line" count in `parseError`, iff `source[index]` is a newline
+// current line" count in `parseError`, iff `peek()` is a newline
 void Parser::maybeAdvanceLine() {
-    if (source[index] == LF) {
+    if (peek() == LF) {
         parseError.line++;
         // add 1 to index to get the number of characters seen so far
         // (including the newline)
@@ -50,10 +51,15 @@ void Parser::maybeAdvanceLine() {
     Signals an error and returns either if `parseError` already denotes an
     error, or `index` is out of bounds for the string `source`
 */
-#define CHECK_BOUNDS(source, index, parseError, errorCode)                                              \
-    if (!inBounds(source, index)) {                                                                     \
-        ERROR(parseError, errorCode, index);                                                            \
-        return;                                                                                         \
+#define CHECK_BOUNDS(errorCode)                                                            \
+    if (!inBounds()) {                                                                     \
+        ERROR(errorCode);                                                                  \
+        return;                                                                            \
+    }
+#define CHECK_BOUNDS_1(errorCode)                                                          \
+    if (!inBounds(1)) {                                                                    \
+        ERROR_AT(errorCode, index + 1);                                                    \
+        return;                                                                            \
     }
 
 // -------------------------------------
@@ -203,6 +209,13 @@ static bool isQuotedChar(UChar32 c) {
         || c == RIGHT_CURLY_BRACE;
 }
 
+static bool isEscapableChar(UChar32 c) {
+    return c == PIPE
+        || c == BACKSLASH
+        || c == LEFT_CURLY_BRACE
+        || c == RIGHT_CURLY_BRACE;
+}
+
 // Returns true iff `c` can begin a `function` nonterminal
 static bool isFunctionStart(UChar32 c) {
     switch (c) {
@@ -247,15 +260,13 @@ static bool isKeyStart(UChar32 c) {
     return (c == ASTERISK || isLiteralStart(c));
 }
 
-inline bool isDeclarationStart(const UnicodeString& source, int32_t index) {
-    int32_t len = source.length();
-    int32_t next = index + 1;
-    return (source[index] == ID_LOCAL[0]
-            && next < len
-            && source[next] == ID_LOCAL[1])
-        || (source[index] == ID_INPUT[0]
-            && next < len
-            && source[next] == ID_INPUT[1]);
+bool Parser::isDeclarationStart() {
+    return (peek() == ID_LOCAL[0]
+            && inBounds(1)
+            && peek(1) == ID_LOCAL[1])
+        || (peek() == ID_INPUT[0]
+            && inBounds(1)
+            && peek(1) == ID_INPUT[1]);
 }
 
 // -------------------------------------
@@ -375,14 +386,14 @@ option, or the optional space before an attribute.
  Unless otherwise noted in a comment, all helper functions that take
     a `source` string, an `index` unsigned int, and an `errorCode` `UErrorCode`
     have the precondition:
-      `index` < `source.length()`
+      `index` < `len()`
     and the postcondition:
-      `U_FAILURE(errorCode)` || `index < `source.length()`
+      `U_FAILURE(errorCode)` || `index < `len()`
 */
 
 /*
   No pre, no post.
-  A message may end with whitespace, so `index` may equal `source.length()` on exit.
+  A message may end with whitespace, so `index` may equal `len()` on exit.
 */
 void Parser::parseWhitespaceMaybeRequired(bool required, UErrorCode& errorCode) {
     bool sawWhitespace = false;
@@ -391,7 +402,7 @@ void Parser::parseWhitespaceMaybeRequired(bool required, UErrorCode& errorCode)
     // or when we see a non-whitespace character.
     while (true) {
         // Check if all input has been consumed
-        if (!inBounds(source, index)) {
+        if (!inBounds()) {
             // If whitespace isn't required -- or if we saw it already --
             // then the caller is responsible for checking this case and
             // setting an error if necessary.
@@ -401,24 +412,24 @@ void Parser::parseWhitespaceMaybeRequired(bool required, UErrorCode& errorCode)
             }
             // Otherwise, whitespace is required; the end of the input has
             // been reached without whitespace. This is an error.
-            ERROR(parseError, errorCode, index);
+            ERROR(errorCode);
             return;
         }
 
         // Input remains; process the next character if it's whitespace,
         // exit the loop otherwise
-        if (isWhitespace(source[index])) {
+        if (isWhitespace(peek())) {
             sawWhitespace = true;
             // Increment line number in parse error if we consume a newline
             maybeAdvanceLine();
-            index++;
+            next();
         } else {
             break;
         }
     }
 
     if (!sawWhitespace && required) {
-        ERROR(parseError, errorCode, index);
+        ERROR(errorCode);
     }
 }
 
@@ -437,36 +448,36 @@ void Parser::parseOptionalWhitespace(UErrorCode& errorCode) {
     parseWhitespaceMaybeRequired(false, errorCode);
 }
 
-// Consumes a single character, signaling an error if `source[index]` != `c`
+// Consumes a single character, signaling an error if `peek()` != `c`
 // No postcondition -- a message can end with a '}' token
 void Parser::parseToken(UChar32 c, UErrorCode& errorCode) {
-    CHECK_BOUNDS(source, index, parseError, errorCode);
+    CHECK_BOUNDS(errorCode);
 
-    if (source[index] == c) {
-        index++;
+    if (peek() == c) {
+        next();
         normalizedInput += c;
         return;
     }
     // Next character didn't match -- error out
-    ERROR(parseError, errorCode, index);
+    ERROR(errorCode);
 }
 
 /*
    Consumes a fixed-length token, signaling an error if the token isn't a prefix of
-   the string beginning at `source[index]`
+   the string beginning at `peek()`
    No postcondition -- a message can end with a '}' token
 */
 void Parser::parseToken(const std::u16string_view& token, UErrorCode& errorCode) {
-    U_ASSERT(inBounds(source, index));
+    U_ASSERT(inBounds());
 
     int32_t tokenPos = 0;
     while (tokenPos < static_cast<int32_t>(token.length())) {
-        if (source[index] != token[tokenPos]) {
-            ERROR(parseError, errorCode, index);
+        if (peek() != token[tokenPos]) {
+            ERROR(errorCode);
             return;
         }
         normalizedInput += token[tokenPos];
-        index++;
+        next();
         tokenPos++;
     }
 }
@@ -481,11 +492,11 @@ void Parser::parseTokenWithWhitespace(const std::u16string_view& token, UErrorCo
     // No need for error check or bounds check before parseOptionalWhitespace
     parseOptionalWhitespace(errorCode);
     // Establish precondition
-    CHECK_BOUNDS(source, index, parseError, errorCode);
+    CHECK_BOUNDS(errorCode);
     parseToken(token, errorCode);
     parseOptionalWhitespace(errorCode);
     // Guarantee postcondition
-    CHECK_BOUNDS(source, index, parseError, errorCode);
+    CHECK_BOUNDS(errorCode);
 }
 
 /*
@@ -498,11 +509,11 @@ void Parser::parseTokenWithWhitespace(UChar32 c, UErrorCode& errorCode) {
     // No need for error check or bounds check before parseOptionalWhitespace(errorCode)
     parseOptionalWhitespace(errorCode);
     // Establish precondition
-    CHECK_BOUNDS(source, index, parseError, errorCode);
+    CHECK_BOUNDS(errorCode);
     parseToken(c, errorCode);
     parseOptionalWhitespace(errorCode);
     // Guarantee postcondition
-    CHECK_BOUNDS(source, index, parseError, errorCode);
+    CHECK_BOUNDS(errorCode);
 }
 
 /*
@@ -517,19 +528,20 @@ void Parser::parseTokenWithWhitespace(UChar32 c, UErrorCode& errorCode) {
 UnicodeString Parser::parseName(UErrorCode& errorCode) {
     UnicodeString name;
 
-    U_ASSERT(inBounds(source, index));
+    U_ASSERT(inBounds());
 
-    if (!isNameStart(source[index])) {
-        ERROR(parseError, errorCode, index);
+    if (!isNameStart(peek())) {
+        ERROR(errorCode);
         return name;
     }
 
-    while (isNameChar(source[index])) {
-        name += source[index];
-        normalizedInput += source[index];
-        index++;
-        if (!inBounds(source, index)) {
-            ERROR(parseError, errorCode, index);
+    while (isNameChar(peek())) {
+        UChar32 c = peek();
+        name += c;
+        normalizedInput += c;
+        next();
+        if (!inBounds()) {
+            ERROR(errorCode);
             break;
         }
     }
@@ -545,13 +557,13 @@ UnicodeString Parser::parseName(UErrorCode& errorCode) {
 VariableName Parser::parseVariableName(UErrorCode& errorCode) {
     VariableName result;
 
-    U_ASSERT(inBounds(source, index));
+    U_ASSERT(inBounds());
     // If the '$' is missing, we don't want a binding
     // for this variable to be created.
-    bool valid = source[index] == DOLLAR;
+    bool valid = peek() == DOLLAR;
     parseToken(DOLLAR, errorCode);
-    if (!inBounds(source, index)) {
-        ERROR(parseError, errorCode, index);
+    if (!inBounds()) {
+        ERROR(errorCode);
         return result;
     }
     UnicodeString varName = parseName(errorCode);
@@ -567,7 +579,7 @@ VariableName Parser::parseVariableName(UErrorCode& errorCode) {
   Corresponds to the `identifier` nonterminal in the grammar
 */
 UnicodeString Parser::parseIdentifier(UErrorCode& errorCode) {
-    U_ASSERT(inBounds(source, index));
+    U_ASSERT(inBounds());
 
     UnicodeString result;
     // The following is a hack to get around ambiguity in the grammar:
@@ -582,7 +594,7 @@ UnicodeString Parser::parseIdentifier(UErrorCode& errorCode) {
     // Parse namespace
     result += parseName(errorCode);
     int32_t firstColon = -1;
-    while (inBounds(source, index) && source[index] == COLON) {
+    while (inBounds() && peek() == COLON) {
         // Parse ':' separator
         if (firstColon == -1) {
             firstColon = index;
@@ -590,8 +602,8 @@ UnicodeString Parser::parseIdentifier(UErrorCode& errorCode) {
         parseToken(COLON, errorCode);
         result += COLON;
         // Check for message ending with something like "foo:"
-        if (!inBounds(source, index)) {
-            ERROR(parseError, errorCode, index);
+        if (!inBounds()) {
+            ERROR(errorCode);
         } else {
             // Parse name part
             result += parseName(errorCode);
@@ -603,7 +615,7 @@ UnicodeString Parser::parseIdentifier(UErrorCode& errorCode) {
     if (firstColon != -1) {
         for (int32_t i = firstColon + 1; i < result.length(); i++) {
             if (result[i] == COLON) {
-                ERROR(parseError, errorCode, i);
+                ERROR_AT(errorCode, i);
                 return {};
             }
         }
@@ -619,16 +631,16 @@ UnicodeString Parser::parseIdentifier(UErrorCode& errorCode) {
   Returns the function name.
 */
 FunctionName Parser::parseFunction(UErrorCode& errorCode) {
-    U_ASSERT(inBounds(source, index));
-    if (!isFunctionStart(source[index])) {
-        ERROR(parseError, errorCode, index);
+    U_ASSERT(inBounds());
+    if (!isFunctionStart(peek())) {
+        ERROR(errorCode);
         return FunctionName();
     }
 
-    normalizedInput += source[index];
-    index++; // Consume the function start character
-    if (!inBounds(source, index)) {
-        ERROR(parseError, errorCode, index);
+    normalizedInput += peek();
+    next(); // Consume the function start character
+    if (!inBounds()) {
+        ERROR(errorCode);
         return FunctionName();
     }
     return parseIdentifier(errorCode);
@@ -636,7 +648,7 @@ FunctionName Parser::parseFunction(UErrorCode& errorCode) {
 
 
 /*
-  Precondition: source[index] == BACKSLASH
+  Precondition: peek() == BACKSLASH
 
   Consume an escaped character.
   Corresponds to `escaped-char` in the grammar.
@@ -644,24 +656,24 @@ FunctionName Parser::parseFunction(UErrorCode& errorCode) {
   No postcondition (a message can end with an escaped char)
 */
 UnicodeString Parser::parseEscapeSequence(UErrorCode& errorCode) {
-    U_ASSERT(inBounds(source, index));
-    U_ASSERT(source[index] == BACKSLASH);
+    U_ASSERT(inBounds());
+    U_ASSERT(peek() == BACKSLASH);
     normalizedInput += BACKSLASH;
-    index++; // Skip the initial backslash
+    next(); // Skip the initial backslash
     UnicodeString str;
-    if (inBounds(source, index)) {
+    if (inBounds()) {
         // Expect a '{', '|' or '}'
-        switch (source[index]) {
+        switch (peek()) {
         case LEFT_CURLY_BRACE:
         case RIGHT_CURLY_BRACE:
         case PIPE:
         case BACKSLASH: {
             /* Append to the output string */
-            str += source[index];
+            str += peek();
             /* Update normalizedInput */
-            normalizedInput += source[index];
+            normalizedInput += peek();
             /* Consume the character */
-            index++;
+            next();
             return str;
         }
         default: {
@@ -671,7 +683,7 @@ UnicodeString Parser::parseEscapeSequence(UErrorCode& errorCode) {
         }
     }
    // If control reaches here, there was an error
-   ERROR(parseError, errorCode, index);
+   ERROR(errorCode);
    return str;
 }
 
@@ -686,26 +698,34 @@ Literal Parser::parseQuotedLiteral(UErrorCode& errorCode) {
     if (U_SUCCESS(errorCode)) {
         // Parse the opening '|'
         parseToken(PIPE, errorCode);
-        if (!inBounds(source, index)) {
-            ERROR(parseError, errorCode, index);
+        if (!inBounds()) {
+            ERROR(errorCode);
             error = true;
         } else {
             // Parse the contents
             bool done = false;
             while (!done) {
-                if (source[index] == BACKSLASH) {
+                if (peek() == BACKSLASH) {
                     contents += parseEscapeSequence(errorCode);
-                } else if (isQuotedChar(source[index])) {
-                    contents += source[index];
-                    normalizedInput += source[index];
-                    index++; // Consume this character
+                } else if (isQuotedChar(peek())) {
+                    contents += peek();
+                    // Handle cases like:
+                    // |}{| -- we want to escape everywhere that
+                    // can be escaped, to make round-trip checking
+                    // easier -- so this case normalizes to
+                    // |\}\{|
+                    if (isEscapableChar(peek())) {
+                        normalizedInput += BACKSLASH;
+                    }
+                    normalizedInput += peek();
+                    next(); // Consume this character
                     maybeAdvanceLine();
                 } else {
                     // Assume the sequence of literal characters ends here
                     done = true;
                 }
-                if (!inBounds(source, index)) {
-                    ERROR(parseError, errorCode, index);
+                if (!inBounds()) {
+                    ERROR(errorCode);
                     error = true;
                     break;
                 }
@@ -729,18 +749,18 @@ UnicodeString Parser::parseDigits(UErrorCode& errorCode) {
         return {};
     }
 
-    U_ASSERT(isDigit(source[index]));
+    U_ASSERT(isDigit(peek()));
 
     UnicodeString contents;
     do {
-        contents += source[index];
-        normalizedInput += source[index];
-        index++;
-        if (!inBounds(source, index)) {
-            ERROR(parseError, errorCode, index);
+        contents += peek();
+        normalizedInput += peek();
+        next();
+        if (!inBounds()) {
+            ERROR(errorCode);
             return {};
         }
-    } while (isDigit(source[index]));
+    } while (isDigit(peek()));
 
     return contents;
 }
@@ -753,7 +773,7 @@ Literal Parser::parseUnquotedLiteral(UErrorCode& errorCode) {
     }
 
     // unquoted -> name
-    if (isNameStart(source[index])) {
+    if (isNameStart(peek())) {
         return Literal(false, parseName(errorCode));
     }
 
@@ -762,75 +782,75 @@ Literal Parser::parseUnquotedLiteral(UErrorCode& errorCode) {
     UnicodeString contents;
 
     // Parse the sign
-    if (source[index] == HYPHEN) {
-        contents += source[index];
-        normalizedInput += source[index];
-        index++;
+    if (peek() == HYPHEN) {
+        contents += peek();
+        normalizedInput += peek();
+        next();
     }
-    if (!inBounds(source, index)) {
-        ERROR(parseError, errorCode, index);
+    if (!inBounds()) {
+        ERROR(errorCode);
         return {};
     }
 
     // Parse the integer part
-    if (source[index] == static_cast<UChar32>(0x0030) /* 0 */) {
-        contents += source[index];
-        normalizedInput += source[index];
-        index++;
-    } else if (isDigit(source[index])) {
+    if (peek() == ((UChar32)0x0030) /* 0 */) {
+        contents += peek();
+        normalizedInput += peek();
+        next();
+    } else if (isDigit(peek())) {
         contents += parseDigits(errorCode);
     } else {
         // Error -- nothing else can start a number literal
-        ERROR(parseError, errorCode, index);
+        ERROR(errorCode);
         return {};
     }
 
     // Parse the decimal point if present
-    if (source[index] == PERIOD) {
-        contents += source[index];
-        normalizedInput += source[index];
-        index++;
-        if (!inBounds(source, index)) {
-            ERROR(parseError, errorCode, index);
+    if (peek() == PERIOD) {
+        contents += peek();
+        normalizedInput += peek();
+        next();
+        if (!inBounds()) {
+            ERROR(errorCode);
             return {};
         }
         // Parse the fraction part
-        if (isDigit(source[index])) {
+        if (isDigit(peek())) {
             contents += parseDigits(errorCode);
         } else {
             // '.' not followed by digit is a parse error
-            ERROR(parseError, errorCode, index);
+            ERROR(errorCode);
             return {};
         }
     }
 
-    if (!inBounds(source, index)) {
-        ERROR(parseError, errorCode, index);
+    if (!inBounds()) {
+        ERROR(errorCode);
         return {};
     }
 
     // Parse the exponent part if present
-    if (source[index] == UPPERCASE_E || source[index] == LOWERCASE_E) {
-        contents += source[index];
-        normalizedInput += source[index];
-        index++;
-        if (!inBounds(source, index)) {
-            ERROR(parseError, errorCode, index);
+    if (peek() == UPPERCASE_E || peek() == LOWERCASE_E) {
+        contents += peek();
+        normalizedInput += peek();
+        next();
+        if (!inBounds()) {
+            ERROR(errorCode);
             return {};
         }
         // Parse sign if present
-        if (source[index] == PLUS || source[index] == HYPHEN) {
-            contents += source[index];
-            normalizedInput += source[index];
-            index++;
-            if (!inBounds(source, index)) {
-                ERROR(parseError, errorCode, index);
+        if (peek() == PLUS || peek() == HYPHEN) {
+            contents += peek();
+            normalizedInput += peek();
+            next();
+            if (!inBounds()) {
+                ERROR(errorCode);
                 return {};
             }
         }
         // Parse exponent digits
-        if (!isDigit(source[index])) {
-            ERROR(parseError, errorCode, index);
+        if (!isDigit(peek())) {
+            ERROR(errorCode);
             return {};
         }
         contents += parseDigits(errorCode);
@@ -844,17 +864,17 @@ Literal Parser::parseUnquotedLiteral(UErrorCode& errorCode) {
 */
 Literal Parser::parseLiteral(UErrorCode& errorCode) {
     Literal result;
-    if (!inBounds(source, index)) {
-        ERROR(parseError, errorCode, index);
+    if (!inBounds()) {
+        ERROR(errorCode);
     } else {
-        if (source[index] == PIPE) {
+        if (peek() == PIPE) {
             result = parseQuotedLiteral(errorCode);
         } else {
             result = parseUnquotedLiteral(errorCode);
         }
         // Guarantee postcondition
-        if (!inBounds(source, index)) {
-            ERROR(parseError, errorCode, index);
+        if (!inBounds()) {
+            ERROR(errorCode);
         }
     }
 
@@ -868,9 +888,9 @@ Literal Parser::parseLiteral(UErrorCode& errorCode) {
 */
 template<class T>
 void Parser::parseAttribute(AttributeAdder<T>& attrAdder, UErrorCode& errorCode) {
-    U_ASSERT(inBounds(source, index));
+    U_ASSERT(inBounds());
 
-    U_ASSERT(source[index] == AT);
+    U_ASSERT(peek() == AT);
     // Consume the '@'
     parseToken(AT, errorCode);
 
@@ -884,13 +904,13 @@ void Parser::parseAttribute(AttributeAdder<T>& attrAdder, UErrorCode& errorCode)
     parseOptionalWhitespace(errorCode);
 
     Operand rand;
-    if (source[index] == EQUALS) {
+    if (peek() == EQUALS) {
         // Parse '='
         parseTokenWithWhitespace(EQUALS, errorCode);
 
         UnicodeString rhsStr;
         // Parse RHS, which is either a literal or variable
-        switch (source[index]) {
+        switch (peek()) {
         case DOLLAR: {
             rand = Operand(parseVariableName(errorCode));
             break;
@@ -919,7 +939,7 @@ void Parser::parseAttribute(AttributeAdder<T>& attrAdder, UErrorCode& errorCode)
 */
 template<class T>
 void Parser::parseOption(OptionAdder<T>& addOption, UErrorCode& errorCode) {
-    U_ASSERT(inBounds(source, index));
+    U_ASSERT(inBounds());
 
     // Parse LHS
     UnicodeString lhs = parseIdentifier(errorCode);
@@ -930,7 +950,7 @@ void Parser::parseOption(OptionAdder<T>& addOption, UErrorCode& errorCode) {
     UnicodeString rhsStr;
     Operand rand;
     // Parse RHS, which is either a literal or variable
-    switch (source[index]) {
+    switch (peek()) {
     case DOLLAR: {
         rand = Operand(parseVariableName(errorCode));
         break;
@@ -967,7 +987,7 @@ void Parser::parseOption(OptionAdder<T>& addOption, UErrorCode& errorCode) {
 template <class T>
 void Parser::parseOptions(OptionAdder<T>& addOption, UErrorCode& errorCode) {
     // Early exit if out of bounds -- no more work is possible
-    CHECK_BOUNDS(source, index, parseError, errorCode);
+    CHECK_BOUNDS(errorCode);
 
 /*
 Arbitrary lookahead is required to parse option lists. To see why, consider
@@ -1015,7 +1035,7 @@ an option or an attribute.
         // If the next character is not whitespace, that means we've already
         // parsed the entire options list (which may have been empty) and there's
         // no trailing whitespace. In that case, exit.
-        if (!isWhitespace(source[index])) {
+        if (!isWhitespace(peek())) {
             break;
         }
         int32_t firstWhitespace = index;
@@ -1024,7 +1044,7 @@ an option or an attribute.
         // one whitespace character.
         parseRequiredWhitespace(errorCode);
         // Restore precondition
-        CHECK_BOUNDS(source, index, parseError, errorCode);
+        CHECK_BOUNDS(errorCode);
 
         // If a name character follows, then at least one more option remains
         // in the list.
@@ -1032,7 +1052,7 @@ an option or an attribute.
         // and can exit.
         // Note that exiting is sort of like backtracking: "(s option)" doesn't apply,
         // so we back out to [s].
-        if (!isNameStart(source[index])) {
+        if (!isNameStart(peek())) {
             // We've consumed all the options (meaning that either we consumed non-empty
             // whitespace, or consumed at least one option.)
             // Done.
@@ -1055,8 +1075,8 @@ template<class T>
 void Parser::parseAttributes(AttributeAdder<T>& attrAdder, UErrorCode& errorCode) {
 
     // Early exit if out of bounds -- no more work is possible
-    if (!inBounds(source, index)) {
-        ERROR(parseError, errorCode, index);
+    if (!inBounds()) {
+        ERROR(errorCode);
         return;
     }
 
@@ -1069,7 +1089,7 @@ Arbitrary lookahead is required to parse attribute lists, similarly to option li
         // If the next character is not whitespace, that means we've already
         // parsed the entire attributes list (which may have been empty) and there's
         // no trailing whitespace. In that case, exit.
-        if (!isWhitespace(source[index])) {
+        if (!isWhitespace(peek())) {
             break;
         }
 
@@ -1077,8 +1097,8 @@ Arbitrary lookahead is required to parse attribute lists, similarly to option li
         // one whitespace character.
         parseRequiredWhitespace(errorCode);
         // Restore precondition
-        if (!inBounds(source, index)) {
-            ERROR(parseError, errorCode, index);
+        if (!inBounds()) {
+            ERROR(errorCode);
             break;
         }
 
@@ -1088,7 +1108,7 @@ Arbitrary lookahead is required to parse attribute lists, similarly to option li
         // and can exit.
         // Note that exiting is sort of like backtracking: "(s attributes)" doesn't apply,
         // so we back out to [s].
-        if (source[index] != AT) {
+        if (peek() != AT) {
             // We've consumed all the attributes (meaning that either we consumed non-empty
             // whitespace, or consumed at least one attribute.)
             // Done.
@@ -1111,16 +1131,16 @@ void Parser::parseReservedChunk(Reserved::Builder& result, UErrorCode& status) {
 
     bool empty = true;
     UnicodeString chunk;
-    while(reservedChunkFollows(source[index])) {
+    while(reservedChunkFollows(peek())) {
         empty = false;
         // reserved-char
-        if (isReservedChar(source[index])) {
-            chunk += source[index];
-            normalizedInput += source[index];
+        if (isReservedChar(peek())) {
+            chunk += peek();
+            normalizedInput += peek();
             // consume the char
-            index++;
+            next();
             // Restore precondition
-            CHECK_BOUNDS(source, index, parseError, status);
+            CHECK_BOUNDS(status);
             continue;
         }
 
@@ -1129,11 +1149,11 @@ void Parser::parseReservedChunk(Reserved::Builder& result, UErrorCode& status) {
           chunk.setTo(u"", 0);
         }
 
-        if (source[index] == BACKSLASH) {
+        if (peek() == BACKSLASH) {
             // reserved-escape
             result.add(Literal(false, parseEscapeSequence(status)), status);
             chunk.setTo(u"", 0);
-        } else if (source[index] == PIPE || isUnquotedStart(source[index])) {
+        } else if (peek() == PIPE || isUnquotedStart(peek())) {
             result.add(parseLiteral(status), status);
         } else {
             // The reserved chunk ends here
@@ -1149,7 +1169,7 @@ void Parser::parseReservedChunk(Reserved::Builder& result, UErrorCode& status) {
     }
 
     if (empty) {
-        ERROR(parseError, status, index);
+        ERROR(status);
     }
 }
 
@@ -1166,23 +1186,23 @@ Reserved Parser::parseReserved(UErrorCode& status) {
         return {};
     }
 
-    U_ASSERT(inBounds(source, index));
+    U_ASSERT(inBounds());
 
     // Require a `reservedStart` character
-    if (!isReservedStart(source[index])) {
-        ERROR(parseError, status, index);
+    if (!isReservedStart(peek())) {
+        ERROR(status);
         return Reserved();
     }
 
     // Add the start char as a separate text chunk
-    UnicodeString firstCharString(source[index]);
+    UnicodeString firstCharString(peek());
     builder.add(Literal(false, firstCharString), status);
     if (U_FAILURE(status)) {
         return {};
     }
     // Consume reservedStart
-    normalizedInput += source[index];
-    index++;
+    normalizedInput += peek();
+    next();
     return parseReservedBody(builder, status);
 }
 
@@ -1226,42 +1246,42 @@ Reserved Parser::parseReservedBody(Reserved::Builder& builder, UErrorCode& statu
           (`parseExpression()` is responsible for checking that the next
           character is in fact a '}'.)
          */
-        if (!inBounds(source, index)) {
+        if (!inBounds()) {
             break;
         }
         int32_t numWhitespaceChars = 0;
         int32_t savedIndex = index;
-        if (isWhitespace(source[index])) {
+        if (isWhitespace(peek())) {
             parseOptionalWhitespace(status);
             numWhitespaceChars = index - savedIndex;
             // Restore precondition
-            if (!inBounds(source, index)) {
+            if (!inBounds()) {
                 break;
             }
         }
 
-        if (reservedChunkFollows(source[index])) {
+        if (reservedChunkFollows(peek())) {
             parseReservedChunk(builder, status);
 
             // Avoid looping infinitely
-            if (U_FAILURE(status) || !inBounds(source, index)) {
+            if (U_FAILURE(status) || !inBounds()) {
                 break;
             }
         } else {
             if (numWhitespaceChars > 0) {
-                if (source[index] == LEFT_CURLY_BRACE) {
+                if (peek() == LEFT_CURLY_BRACE) {
                     // Resolve even more ambiguity (space preceding another piece of
                     // a `reserved-body`, vs. space preceding an expression in `reserved-statement`
                     // "Backtrack"
                     index -= numWhitespaceChars;
                     break;
                 }
-                if (source[index] == RIGHT_CURLY_BRACE) {
+                if (peek() == RIGHT_CURLY_BRACE) {
                     // Not an error: just means there's no trailing whitespace
                     // after this `reserved`
                     break;
                 }
-                if (source[index] == AT) {
+                if (peek() == AT) {
                     // Not an error, but we have to "backtrack" due to the ambiguity
                     // between an `s` preceding another reserved chunk
                     // and an `s` preceding an attribute list
@@ -1270,7 +1290,7 @@ Reserved Parser::parseReservedBody(Reserved::Builder& builder, UErrorCode& statu
                 }
                 // Error: if there's whitespace, it must either be followed
                 // by a non-empty sequence or by '}'
-                ERROR(parseError, status, index);
+                ERROR(status);
                 break;
             }
             // If there was no whitespace, it's not an error,
@@ -1289,12 +1309,12 @@ Reserved Parser::parseReservedBody(Reserved::Builder& builder, UErrorCode& statu
   Returns an `Operator` representing this (a reserved is a parse error)
 */
 Operator Parser::parseAnnotation(UErrorCode& status) {
-    U_ASSERT(inBounds(source, index));
+    U_ASSERT(inBounds());
     Operator::Builder ratorBuilder(status);
     if (U_FAILURE(status)) {
         return {};
     }
-    if (isFunctionStart(source[index])) {
+    if (isFunctionStart(peek())) {
         // Consume the function name
         FunctionName func = parseFunction(status);
         ratorBuilder.setFunctionName(std::move(func));
@@ -1326,7 +1346,7 @@ void Parser::parseLiteralOrVariableWithAnnotation(bool isVariable,
                                                   UErrorCode& status) {
     CHECK_ERROR(status);
 
-    U_ASSERT(inBounds(source, index));
+    U_ASSERT(inBounds());
 
     Operand rand;
     if (isVariable) {
@@ -1360,7 +1380,7 @@ refactoring for the `expression` nonterminal that `parseOptions()` relies on. Se
 the comment in `parseOptions()` for details.
 */
 
-    if (isWhitespace(source[index])) {
+    if (isWhitespace(peek())) {
       int32_t firstWhitespace = index;
 
       // If the next character is whitespace, either [s annotation] or [s] applies
@@ -1369,10 +1389,10 @@ the comment in `parseOptions()` for details.
       // one does apply.
       parseOptionalWhitespace(status);
       // Restore precondition
-      CHECK_BOUNDS(source, index, parseError, status);
+      CHECK_BOUNDS(status);
 
       // This next check resolves the ambiguity between [s annotation] and [s]
-      bool isSAnnotation = isAnnotationStart(source[index]);
+      bool isSAnnotation = isAnnotationStart(peek());
 
       if (isSAnnotation) {
         normalizedInput += SPACE;
@@ -1433,7 +1453,7 @@ Expression Parser::parseExpression(UErrorCode& status) {
     }
 
     // Early return if out of input -- no more work is possible
-    U_ASSERT(inBounds(source, index));
+    U_ASSERT(inBounds());
 
     // Parse opening brace
     parseToken(LEFT_CURLY_BRACE, status);
@@ -1442,11 +1462,11 @@ Expression Parser::parseExpression(UErrorCode& status) {
 
     Expression::Builder exprBuilder(status);
     // Restore precondition
-    if (!inBounds(source, index)) {
+    if (!inBounds()) {
         exprFallback(exprBuilder);
     } else {
         // literal '|', variable '$' or annotation
-        switch (source[index]) {
+        switch (peek()) {
         case PIPE: {
             // Quoted literal
             parseLiteralOrVariableWithAnnotation(false, exprBuilder, status);
@@ -1458,15 +1478,15 @@ Expression Parser::parseExpression(UErrorCode& status) {
             break;
         }
         default: {
-            if (isAnnotationStart(source[index])) {
+            if (isAnnotationStart(peek())) {
                 Operator rator = parseAnnotation(status);
                 exprBuilder.setOperator(std::move(rator));
-            } else if (isUnquotedStart(source[index])) {
+            } else if (isUnquotedStart(peek())) {
                 // Unquoted literal
                 parseLiteralOrVariableWithAnnotation(false, exprBuilder, status);
             } else {
                 // Not a literal, variable or annotation -- error out
-                ERROR(parseError, status, index);
+                ERROR(status);
                 exprFallback(exprBuilder);
                 break;
             }
@@ -1490,8 +1510,8 @@ Expression Parser::parseExpression(UErrorCode& status) {
     U_ASSERT(U_SUCCESS(localStatus));
 
     // Check for end-of-input and missing '}'
-    if (!inBounds(source, index)) {
-        ERROR(parseError, status, index);
+    if (!inBounds()) {
+        ERROR(status);
     } else {
         // Otherwise, it's safe to check for the '}'
         parseToken(RIGHT_CURLY_BRACE, status);
@@ -1506,17 +1526,17 @@ Expression Parser::parseExpression(UErrorCode& status) {
 void Parser::parseLocalDeclaration(UErrorCode& status) {
     // End-of-input here would be an error; even empty
     // declarations must be followed by a body
-    CHECK_BOUNDS(source, index, parseError, status);
+    CHECK_BOUNDS(status);
 
     parseToken(ID_LOCAL, status);
     parseRequiredWhitespace(status);
 
     // Restore precondition
-    CHECK_BOUNDS(source, index, parseError, status);
+    CHECK_BOUNDS(status);
     VariableName lhs = parseVariableName(status);
     parseTokenWithWhitespace(EQUALS, status);
     // Restore precondition before calling parseExpression()
-    CHECK_BOUNDS(source, index, parseError, status);
+    CHECK_BOUNDS(status);
 
     Expression rhs = parseExpression(status);
 
@@ -1554,13 +1574,13 @@ void Parser::parseLocalDeclaration(UErrorCode& status) {
 void Parser::parseInputDeclaration(UErrorCode& status) {
     // End-of-input here would be an error; even empty
     // declarations must be followed by a body
-    CHECK_BOUNDS(source, index, parseError, status);
+    CHECK_BOUNDS(status);
 
     parseToken(ID_INPUT, status);
     parseOptionalWhitespace(status);
 
     // Restore precondition before calling parseExpression()
-    CHECK_BOUNDS(source, index, parseError, status);
+    CHECK_BOUNDS(status);
 
     // Save the index for error diagnostics
     int32_t exprIndex = index;
@@ -1570,7 +1590,7 @@ void Parser::parseInputDeclaration(UErrorCode& status) {
     if (!rhs.getOperand().isVariable()) {
         // This case is a syntax error; report it at the beginning
         // of the expression
-        ERROR(parseError, status, exprIndex);
+        ERROR_AT(status, exprIndex);
         return;
     }
 
@@ -1597,7 +1617,7 @@ void Parser::parseInputDeclaration(UErrorCode& status) {
   Parses a `reserved-statement` per the grammar
  */
 void Parser::parseUnsupportedStatement(UErrorCode& status) {
-    U_ASSERT(inBounds(source, index) && source[index] == PERIOD);
+    U_ASSERT(inBounds() && peek() == PERIOD);
 
     UnsupportedStatement::Builder builder(status);
     CHECK_ERROR(status);
@@ -1605,7 +1625,7 @@ void Parser::parseUnsupportedStatement(UErrorCode& status) {
     // Parse the keyword
     UnicodeString keyword(PERIOD);
     normalizedInput += UnicodeString(PERIOD);
-    index++;
+    next();
     keyword += parseName(status);
     builder.setKeyword(keyword);
 
@@ -1617,18 +1637,18 @@ void Parser::parseUnsupportedStatement(UErrorCode& status) {
     //   a '{')
     // * a '{'
 
-    CHECK_BOUNDS(source, index, parseError, status);
+    CHECK_BOUNDS(status);
 
-    if (source[index] != LEFT_CURLY_BRACE) {
-        if (!isWhitespace(source[index])) {
-            ERROR(parseError, status, index);
+    if (peek() != LEFT_CURLY_BRACE) {
+        if (!isWhitespace(peek())) {
+            ERROR(status);
             return;
         }
         // Expect a reserved-body start
         int32_t savedIndex = index;
         parseRequiredWhitespace(status);
-        CHECK_BOUNDS(source, index, parseError, status);
-        if (isReservedBodyStart(source[index])) {
+        CHECK_BOUNDS(status);
+        if (isReservedBodyStart(peek())) {
             // There is a reserved body
             Reserved::Builder r(status);
             builder.setBody(parseReservedBody(r, status));
@@ -1639,8 +1659,8 @@ void Parser::parseUnsupportedStatement(UErrorCode& status) {
         }
         // Otherwise, the next character must be a '{'
         // to open the required expression (or optional whitespace)
-        if (source[index] != LEFT_CURLY_BRACE && !isWhitespace(source[index])) {
-            ERROR(parseError, status, index);
+        if (peek() != LEFT_CURLY_BRACE && !isWhitespace(peek())) {
+            ERROR(status);
             return;
         }
     }
@@ -1650,12 +1670,12 @@ void Parser::parseUnsupportedStatement(UErrorCode& status) {
     // Need to look ahead to disambiguate a '{' beginning
     // an expression from one beginning with a quoted pattern
     int32_t expressionCount = 0;
-    while (source[index] == LEFT_CURLY_BRACE || isWhitespace(source[index])) {
+    while (peek() == LEFT_CURLY_BRACE || isWhitespace(peek())) {
         parseOptionalWhitespace(status);
 
-        bool nextIsLbrace = source[index] == LEFT_CURLY_BRACE;
-        bool nextIsQuotedPattern = nextIsLbrace && inBounds(source, index + 1)
-            && source[index + 1] == LEFT_CURLY_BRACE;
+        bool nextIsLbrace = peek() == LEFT_CURLY_BRACE;
+        bool nextIsQuotedPattern = nextIsLbrace && inBounds(1)
+            && peek(1) == LEFT_CURLY_BRACE;
         if (nextIsQuotedPattern) {
             break;
         }
@@ -1665,7 +1685,7 @@ void Parser::parseUnsupportedStatement(UErrorCode& status) {
     }
     if (expressionCount <= 0) {
         // At least one expression is required
-        ERROR(parseError, status, index);
+        ERROR(status);
         return;
     }
     dataModel.addUnsupportedStatement(builder.build(status), status);
@@ -1675,7 +1695,7 @@ void Parser::parseUnsupportedStatement(UErrorCode& status) {
 // and supported keywords
 bool Parser::nextIs(const std::u16string_view &keyword) const {
     for (int32_t i = 0; i < static_cast<int32_t>(keyword.length()); i++) {
-        if (!inBounds(source, index + i) || source[index + i] != keyword[i]) {
+        if (!inBounds(i) || peek(i) != keyword[i]) {
             return false;
         }
     }
@@ -1691,17 +1711,17 @@ bool Parser::nextIs(const std::u16string_view &keyword) const {
 void Parser::parseDeclarations(UErrorCode& status) {
     // End-of-input here would be an error; even empty
     // declarations must be followed by a body
-    CHECK_BOUNDS(source, index, parseError, status);
+    CHECK_BOUNDS(status);
 
-    while (source[index] == PERIOD) {
-        CHECK_BOUNDS(source, index + 1, parseError, status);
+    while (peek() == PERIOD) {
+        CHECK_BOUNDS_1(status);
         // Check for unsupported statements first
         // Lookahead is needed to disambiguate keyword from known keywords
         if (!nextIs(ID_MATCH) && !nextIs(ID_LOCAL) && !nextIs(ID_INPUT)) {
             parseUnsupportedStatement(status);
-        } else if (source[index + 1] == ID_LOCAL[1]) {
+        } else if (peek(1) == ID_LOCAL[1]) {
             parseLocalDeclaration(status);
-        } else if (source[index + 1] == ID_INPUT[1]) {
+        } else if (peek(1) == ID_INPUT[1]) {
             parseInputDeclaration(status);
         } else {
             // Done parsing declarations
@@ -1713,7 +1733,7 @@ void Parser::parseDeclarations(UErrorCode& status) {
 
         parseOptionalWhitespace(status);
         // Restore precondition
-        CHECK_BOUNDS(source, index, parseError, status);
+        CHECK_BOUNDS(status);
     }
 }
 
@@ -1725,13 +1745,17 @@ void Parser::parseDeclarations(UErrorCode& status) {
 */
 UnicodeString Parser::parseTextChar(UErrorCode& status) {
     UnicodeString str;
-    if (!inBounds(source, index) || !(isTextChar(source[index]))) {
+    if (!inBounds() || !(isTextChar(peek()))) {
         // Error -- text-char is expected here
-        ERROR(parseError, status, index);
+        ERROR(status);
     } else {
-        normalizedInput += source[index];
-        str += source[index];
-        index++;
+        // See comment in parseQuotedLiteral()
+        if (isEscapableChar(peek())) {
+            normalizedInput += BACKSLASH;
+        }
+        normalizedInput += peek();
+        str += peek();
+        next();
         maybeAdvanceLine();
     }
     return str;
@@ -1742,17 +1766,17 @@ UnicodeString Parser::parseTextChar(UErrorCode& status) {
   the `key` nonterminal in the grammar
 */
 Key Parser::parseKey(UErrorCode& status) {
-    U_ASSERT(inBounds(source, index));
+    U_ASSERT(inBounds());
 
     Key k; // wildcard by default
     // Literal | '*'
-    switch (source[index]) {
+    switch (peek()) {
     case ASTERISK: {
-        index++;
+        next();
         normalizedInput += ASTERISK;
         // Guarantee postcondition
-        if (!inBounds(source, index)) {
-            ERROR(parseError, status, index);
+        if (!inBounds()) {
+            ERROR(status);
             return k;
         }
         break;
@@ -1778,7 +1802,7 @@ SelectorKeys Parser::parseNonEmptyKeys(UErrorCode& status) {
         return result;
     }
 
-    U_ASSERT(inBounds(source, index));
+    U_ASSERT(inBounds());
 
 /*
 Arbitrary lookahead is required to parse key lists. To see why, consider
@@ -1810,31 +1834,31 @@ This is addressed using "backtracking" (similarly to `parseOptions()`).
     keysBuilder.add(parseKey(status), status);
 
     // Restore precondition
-    if (!inBounds(source, index)) {
-        ERROR(parseError, status, index);
+    if (!inBounds()) {
+        ERROR(status);
         return result;
     }
 
     // We've seen at least one whitespace-key pair, so now we can parse
     // *(s key) [s]
-    while (source[index] != LEFT_CURLY_BRACE || isWhitespace(source[index])) { // Try to recover from errors
-        bool wasWhitespace = isWhitespace(source[index]);
+    while (peek() != LEFT_CURLY_BRACE || isWhitespace(peek())) { // Try to recover from errors
+        bool wasWhitespace = isWhitespace(peek());
         parseRequiredWhitespace(status);
         if (!wasWhitespace) {
             // Avoid infinite loop when parsing something like:
             // when * @{!...
-            index++;
+            next();
         }
 
         // Restore precondition
-        if (!inBounds(source, index)) {
-            ERROR(parseError, status, index);
+        if (!inBounds()) {
+            ERROR(status);
             return result;
         }
 
         // At this point, it's ambiguous whether we are inside (s key) or [s].
         // This check resolves that ambiguity.
-        if (source[index] == LEFT_CURLY_BRACE) {
+        if (peek() == LEFT_CURLY_BRACE) {
             // A pattern follows, so what we just parsed was the optional
             // trailing whitespace. All the keys have been parsed.
 
@@ -1849,7 +1873,7 @@ This is addressed using "backtracking" (similarly to `parseOptions()`).
 }
 
 Pattern Parser::parseQuotedPattern(UErrorCode& status) {
-    U_ASSERT(inBounds(source, index));
+    U_ASSERT(inBounds());
 
     parseToken(LEFT_CURLY_BRACE, status);
     parseToken(LEFT_CURLY_BRACE, status);
@@ -1864,9 +1888,9 @@ Pattern Parser::parseQuotedPattern(UErrorCode& status) {
   No postcondition (a markup can end a message)
 */
 Markup Parser::parseMarkup(UErrorCode& status) {
-    U_ASSERT(inBounds(source, index + 1));
+    U_ASSERT(inBounds(1));
 
-    U_ASSERT(source[index] == LEFT_CURLY_BRACE);
+    U_ASSERT(peek() == LEFT_CURLY_BRACE);
 
     Markup::Builder builder(status);
     if (U_FAILURE(status)) {
@@ -1874,26 +1898,26 @@ Markup Parser::parseMarkup(UErrorCode& status) {
     }
 
     // Consume the '{'
-    index++;
+    next();
     normalizedInput += LEFT_CURLY_BRACE;
     parseOptionalWhitespace(status);
     bool closing = false;
-    switch (source[index]) {
+    switch (peek()) {
     case NUMBER_SIGN: {
         // Open or standalone; consume the '#'
-        normalizedInput += source[index];
-        index++;
+        normalizedInput += peek();
+        next();
         break;
     }
     case SLASH: {
         // Closing
-        normalizedInput += source[index];
+        normalizedInput += peek();
         closing = true;
-        index++;
+        next();
         break;
     }
     default: {
-        ERROR(parseError, status, index);
+        ERROR(status);
         return {};
     }
     }
@@ -1903,14 +1927,14 @@ Markup Parser::parseMarkup(UErrorCode& status) {
 
     // Parse the options, which must begin with a ' '
     // if present
-    if (inBounds(source, index) && isWhitespace(source[index])) {
+    if (inBounds() && isWhitespace(peek())) {
         OptionAdder<Markup::Builder> optionAdder(builder);
         parseOptions(optionAdder, status);
     }
 
     // Parse the attributes, which also must begin
     // with a ' '
-    if (inBounds(source, index) && isWhitespace(source[index])) {
+    if (inBounds() && isWhitespace(peek())) {
         AttributeAdder<Markup::Builder> attrAdder(builder);
         parseAttributes(attrAdder, status);
     }
@@ -1920,10 +1944,10 @@ Markup Parser::parseMarkup(UErrorCode& status) {
     bool standalone = false;
     // Check if this is a standalone or not
     if (!closing) {
-        if (inBounds(source, index) && source[index] == SLASH) {
+        if (inBounds() && peek() == SLASH) {
             standalone = true;
             normalizedInput += SLASH;
-            index++;
+            next();
         }
     }
 
@@ -1945,23 +1969,25 @@ Markup Parser::parseMarkup(UErrorCode& status) {
   No postcondition (a placeholder can end a message)
 */
 std::variant<Expression, Markup> Parser::parsePlaceholder(UErrorCode& status) {
-    U_ASSERT(source[index] == LEFT_CURLY_BRACE);
+    U_ASSERT(peek() == LEFT_CURLY_BRACE);
 
-    if (!inBounds(source, index)) {
-        ERROR(parseError, status, index);
+    if (!inBounds()) {
+        ERROR(status);
         return exprFallback(status);
     }
 
-    // Need to look ahead arbitrarily since can appear before the '{' and '#'
+    // Need to look ahead arbitrarily since whitespace
+    // can appear before the '{' and '#'
     // in markup
-    int32_t tempIndex = index + 1;
+    int32_t tempIndex = 1;
     bool isMarkup = false;
-    while (inBounds(source, tempIndex)) {
-        if (source[tempIndex] == NUMBER_SIGN || source[tempIndex] == SLASH) {
+    while (inBounds(1)) {
+        UChar32 c = peek(tempIndex);
+        if (c == NUMBER_SIGN || c == SLASH) {
             isMarkup = true;
             break;
         }
-        if (!isWhitespace(source[tempIndex])){
+        if (!isWhitespace(c)){
             break;
         }
         tempIndex++;
@@ -1975,7 +2001,7 @@ std::variant<Expression, Markup> Parser::parsePlaceholder(UErrorCode& status) {
 
 /*
   Consume a `simple-message`, matching the nonterminal in the grammar
-  Postcondition: `index == source.length()` or U_FAILURE(status);
+  Postcondition: `index == len()` or U_FAILURE(status);
   for a syntactically correct message, this will consume the entire input
 */
 Pattern Parser::parseSimpleMessage(UErrorCode& status) {
@@ -1983,8 +2009,8 @@ Pattern Parser::parseSimpleMessage(UErrorCode& status) {
 
     if (U_SUCCESS(status)) {
         Expression expression;
-        while (inBounds(source, index)) {
-            switch (source[index]) {
+        while (inBounds()) {
+            switch (peek()) {
             case LEFT_CURLY_BRACE: {
                 // Must be placeholder
                 std::variant<Expression, Markup> piece = parsePlaceholder(status);
@@ -2012,7 +2038,7 @@ Pattern Parser::parseSimpleMessage(UErrorCode& status) {
                 break;
             }
             }
-            if (source[index] == RIGHT_CURLY_BRACE) {
+            if (peek() == RIGHT_CURLY_BRACE) {
                 // End of quoted pattern
                 break;
             }
@@ -2030,13 +2056,13 @@ Pattern Parser::parseSimpleMessage(UErrorCode& status) {
   Consume a `selectors` (matching the nonterminal in the grammar),
   followed by a non-empty sequence of `variant`s (matching the nonterminal
   in the grammar) preceded by whitespace
-  No postcondition (on return, `index` might equal `source.length()` with no syntax error
+  No postcondition (on return, `index` might equal `len()` with no syntax error
   because a message can end with a variant)
 */
 void Parser::parseSelectors(UErrorCode& status) {
     CHECK_ERROR(status);
 
-    U_ASSERT(inBounds(source, index));
+    U_ASSERT(inBounds());
 
     parseToken(ID_MATCH, status);
 
@@ -2044,11 +2070,11 @@ void Parser::parseSelectors(UErrorCode& status) {
     // Parse selectors
     // "Backtracking" is required here. It's not clear if whitespace is
     // (`[s]` selector) or (`[s]` variant)
-    while (isWhitespace(source[index]) || source[index] == LEFT_CURLY_BRACE) {
+    while (isWhitespace(peek()) || peek() == LEFT_CURLY_BRACE) {
         parseOptionalWhitespace(status);
         // Restore precondition
-        CHECK_BOUNDS(source, index, parseError, status);
-        if (source[index] != LEFT_CURLY_BRACE) {
+        CHECK_BOUNDS(status);
+        if (peek() != LEFT_CURLY_BRACE) {
             // This is not necessarily an error, but rather,
             // means the whitespace we parsed was the optional
             // whitespace preceding the first variant, not the
@@ -2065,29 +2091,21 @@ void Parser::parseSelectors(UErrorCode& status) {
 
     // At least one selector is required
     if (empty) {
-        ERROR(parseError, status, index);
+        ERROR(status);
         return;
     }
 
     #define CHECK_END_OF_INPUT                     \
-        if (((int32_t)index) >= source.length()) { \
+        if (!inBounds()) {                         \
             break;                                 \
         }                                          \
 
     // Parse variants
-    while (isWhitespace(source[index]) || isKeyStart(source[index])) {
-        if (isWhitespace(source[index])) {
-            int32_t whitespaceStart = index;
-            parseOptionalWhitespace(status);
-            // Restore the precondition.
-            // Error out if we reached the end of input. The message
-            // cannot end with trailing whitespace if there are variants.
-            if (!inBounds(source, index)) {
-                // Use index of first whitespace for error message
-                index = whitespaceStart;
-                ERROR(parseError, status, index);
-                return;
-            }
+    while (isWhitespace(peek()) || isKeyStart(peek())) {
+        // Trailing whitespace is allowed
+        parseOptionalWhitespace(status);
+        if (!inBounds()) {
+            return;
         }
 
         // At least one key is required
@@ -2100,7 +2118,7 @@ void Parser::parseSelectors(UErrorCode& status) {
 
         // Restore precondition before calling parsePattern()
         // (which must return a non-null value)
-        CHECK_BOUNDS(source, index, parseError, status);
+        CHECK_BOUNDS(status);
         Pattern rhs = parseQuotedPattern(status);
 
         dataModel.addVariant(std::move(keyList), std::move(rhs), status);
@@ -2117,7 +2135,7 @@ void Parser::parseSelectors(UErrorCode& status) {
 
 /*
   Consume a `body` (matching the nonterminal in the grammar),
-  No postcondition (on return, `index` might equal `source.length()` with no syntax error,
+  No postcondition (on return, `index` might equal `len()` with no syntax error,
   because a message can end with a body (trailing whitespace is optional)
 */
 
@@ -2136,8 +2154,9 @@ void Parser::errorPattern(UErrorCode& status) {
       whether this is the intent behind the spec
      */
     UnicodeString partStr(LEFT_CURLY_BRACE);
-    while (inBounds(source, index)) {
-        partStr += source[index++];
+    while (inBounds()) {
+        partStr += peek();
+        next();
     }
     // Add curly braces around the entire output (same comment as above)
     partStr += RIGHT_CURLY_BRACE;
@@ -2149,13 +2168,13 @@ void Parser::parseBody(UErrorCode& status) {
     CHECK_ERROR(status);
 
     // Out-of-input is a syntax warning
-    if (!inBounds(source, index)) {
+    if (!inBounds()) {
         errorPattern(status);
         return;
     }
 
     // Body must be either a pattern or selectors
-    switch (source[index]) {
+    switch (peek()) {
     case LEFT_CURLY_BRACE: {
         // Pattern
         dataModel.setPattern(parseQuotedPattern(status));
@@ -2167,7 +2186,7 @@ void Parser::parseBody(UErrorCode& status) {
         return;
     }
     default: {
-        ERROR(parseError, status, index);
+        ERROR(status);
         errorPattern(status);
         return;
     }
@@ -2183,14 +2202,17 @@ void Parser::parse(UParseError &parseErrorResult, UErrorCode& status) {
     bool complex = false;
     // First, "look ahead" to determine if this is a simple or complex
     // message. To do that, check the first non-whitespace character.
-    while (inBounds(source, index) && isWhitespace(source[index])) {
-        index++;
-    }
-    if (inBounds(source, index)) {
-        if (source[index] == PERIOD
-            || (index < static_cast<uint32_t>(source.length()) + 1
-                && source[index] == LEFT_CURLY_BRACE
-                && source[index + 1] == LEFT_CURLY_BRACE)) {
+    while (inBounds(index) && isWhitespace(peek())) {
+        next();
+    }
+
+    // Message can be empty, so we need to only look ahead
+    // if we know it's non-empty
+    if (inBounds()) {
+        if (peek() == PERIOD
+            || (inBounds(1)
+                && peek() == LEFT_CURLY_BRACE
+                && peek(1) == LEFT_CURLY_BRACE)) {
             complex = true;
         }
     }
@@ -2217,8 +2239,8 @@ void Parser::parse(UParseError &parseErrorResult, UErrorCode& status) {
     CHECK_ERROR(status);
 
     // There are no errors; finally, check that the entire input was consumed
-    if (static_cast<int32_t>(index) != source.length()) {
-      ERROR(parseError, status, index);
+    if (!allConsumed()) {
+        ERROR(status);
     }
 
     // Finally, copy the relevant fields of the internal `MessageParseError`
diff --git a/icu4c/source/i18n/messageformat2_parser.h b/icu4c/source/i18n/messageformat2_parser.h
index 9367c0e981dd..4b35760cb118 100644
--- a/icu4c/source/i18n/messageformat2_parser.h
+++ b/icu4c/source/i18n/messageformat2_parser.h
@@ -140,10 +140,21 @@ namespace message2 {
 	SelectorKeys parseNonEmptyKeys(UErrorCode&);
 	void errorPattern(UErrorCode& status);
 	Pattern parseQuotedPattern(UErrorCode&);
+        bool isDeclarationStart();
+
+        UChar32 peek() const { return source.char32At(index) ; }
+        UChar32 peek(uint32_t i) const {
+            return source.char32At(source.moveIndex32(index, i));
+        }
+        void next() { index = source.moveIndex32(index, 1); }
+
+        bool inBounds() const { return (int32_t) index < source.length(); }
+        bool inBounds(uint32_t i) const { return source.moveIndex32(index, i) < source.length(); }
+        bool allConsumed() const { return (int32_t) index == source.length(); }
 
 	// The input string
 	const UnicodeString &source;
-	// The current position within the input string
+	// The current position within the input string -- counting in UChar32
 	uint32_t index;
 	// Represents the current line (and when an error is indicated),
 	// character offset within the line of the parse error
diff --git a/icu4c/source/i18n/messageformat2_serializer.cpp b/icu4c/source/i18n/messageformat2_serializer.cpp
index 82fa65859419..228cf34181cd 100644
--- a/icu4c/source/i18n/messageformat2_serializer.cpp
+++ b/icu4c/source/i18n/messageformat2_serializer.cpp
@@ -42,24 +42,26 @@ void Serializer::emit(const std::u16string_view& token) {
 void Serializer::emit(const Literal& l) {
     if (l.isQuoted()) {
       emit(PIPE);
-      const UnicodeString& contents = l.unquoted();
-      for (int32_t i = 0; i < contents.length(); i++) {
-        // Re-escape any PIPE or BACKSLASH characters
+    }
+    const UnicodeString& contents = l.unquoted();
+    for (int32_t i = 0; ((int32_t) i) < contents.length(); i++) {
+        // Re-escape any escaped-char characters
         switch(contents[i]) {
         case BACKSLASH:
-        case PIPE: {
-          emit(BACKSLASH);
-          break;
+        case PIPE:
+        case LEFT_CURLY_BRACE:
+        case RIGHT_CURLY_BRACE: {
+            emit(BACKSLASH);
+            break;
         }
         default: {
-          break;
+            break;
         }
         }
         emit(contents[i]);
-      }
-      emit(PIPE);
-    } else {
-      emit(l.unquoted());
+    }
+    if (l.isQuoted()) {
+        emit(PIPE);
     }
 }
 
@@ -194,9 +196,10 @@ void Serializer::emit(const PatternPart& part) {
     if (part.isText()) {
         // Raw text
         const UnicodeString& text = part.asText();
-        // Re-escape '{'/'}'/'\'
-        for (int32_t i = 0; i < text.length(); i++) {
+        // Re-escape '{'/'}'/'\''|'
+        for (int32_t i = 0; ((int32_t) i) < text.length(); i++) {
           switch(text[i]) {
+          case PIPE:
           case BACKSLASH:
           case LEFT_CURLY_BRACE:
           case RIGHT_CURLY_BRACE: {
diff --git a/icu4c/source/test/intltest/messageformat2test_read_json.cpp b/icu4c/source/test/intltest/messageformat2test_read_json.cpp
index 0c65764e7fb5..53d25ebe715c 100644
--- a/icu4c/source/test/intltest/messageformat2test_read_json.cpp
+++ b/icu4c/source/test/intltest/messageformat2test_read_json.cpp
@@ -303,6 +303,8 @@ void TestMessageFormat2::jsonTestsFromFiles(IcuTestErrorCode& errorCode) {
     // Do tests for reserved statements/expressions
     runTestsFromJsonFile(*this, "spec/unsupported-expressions.json", errorCode);
     runTestsFromJsonFile(*this, "spec/unsupported-statements.json", errorCode);
+    // Uncomment when reserved syntax is removed
+    // runTestsFromJsonFile(*this, "syntax-errors-reserved.json", errorCode);
 
     // Do valid spec tests
     runTestsFromJsonFile(*this, "spec/syntax.json", errorCode);
diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/message2/MFParser.java b/icu4j/main/core/src/main/java/com/ibm/icu/message2/MFParser.java
index 8ba50d055c37..61d18ffde8a4 100644
--- a/icu4j/main/core/src/main/java/com/ibm/icu/message2/MFParser.java
+++ b/icu4j/main/core/src/main/java/com/ibm/icu/message2/MFParser.java
@@ -613,10 +613,8 @@ private MFDataModel.Variant getVariant() throws MFParseException {
             }
             keys.add(key);
         }
-        // Only want to skip whitespace if we parsed at least one key
-        if (!keys.isEmpty()) {
-            skipOptionalWhitespaces();
-        }
+        // Trailing whitespace is allowed after the message
+        skipOptionalWhitespaces();
         if (input.atEnd()) {
             checkCondition(
                     keys.isEmpty(), "After selector keys it is mandatory to have a pattern.");
diff --git a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/message2/CoreTest.java b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/message2/CoreTest.java
index a0ec7c2fe3d1..dd2698087be8 100644
--- a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/message2/CoreTest.java
+++ b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/message2/CoreTest.java
@@ -40,6 +40,7 @@ public class CoreTest extends CoreTestFmwk {
                                                 "syntax-errors-diagnostics.json",
                                                 "syntax-errors-diagnostics-multiline.json",
                                                 "syntax-errors-end-of-input.json",
+                                                "syntax-errors-reserved.json",
                                                 "tricky-declarations.json",
                                                 "valid-tests.json"};
 
diff --git a/testdata/message2/syntax-errors-diagnostics.json b/testdata/message2/syntax-errors-diagnostics.json
index 5b68ae80f31b..79a29a027f9b 100644
--- a/testdata/message2/syntax-errors-diagnostics.json
+++ b/testdata/message2/syntax-errors-diagnostics.json
@@ -136,11 +136,6 @@
         "char": 28,
         "comment": "Trailing characters that are not whitespace"
     },
-    {
-        "src": ".match {$foo :string} {$bar :string}  one * {{one}}  * * {{other}}   ",
-        "char": 66,
-        "comment": "Trailing whitespace at end of message should not be accepted either"
-    },
     {
         "src": "empty { }",
         "char": 8,
@@ -343,6 +338,10 @@
         "src":  ".match {1} {{_}}",
         "char": 12,
         "comment": "Disambiguating a wrong .match from an unsupported statement"
+    },
+    {
+        "src": "{{{/p o4.􍅲 = 1}}}",
+        "char": 9
     }
   ]
 }
diff --git a/testdata/message2/syntax-errors-reserved.json b/testdata/message2/syntax-errors-reserved.json
new file mode 100644
index 000000000000..e82421485d23
--- /dev/null
+++ b/testdata/message2/syntax-errors-reserved.json
@@ -0,0 +1,22 @@
+{
+  "scenario": "Reserved and private annotations",
+  "description": "Tests for old unsupported expression (reserved/private) syntax -- these are now syntax errors",
+  "defaultTestProperties": {
+    "locale": "en-US",
+    "expErrors": [
+        {
+        "type": "syntax-error"
+      }
+    ]
+  },
+  "tests": [
+      { "src" : "\\\\{   ?   󗟋   𫓜|@}   |}" },
+      { "src" : "\\\\{   ?   󗟋   𫓜|@}   |   \\}   @𠟅򑚎𥪙𽧫=||   @򒘒򳷦㥞򉊷=$򸚶񽱆񅗽񤕞   @𰺱:񎫛񢶛򶈎񄮒}" },
+      { "src" : "{  $iFN  ^  @USh  =$u  @l}" },
+      { "src" : ".local $dS ={ $p4 ^ |.| \\\\ @g:FV = $kd}{{@}}" },
+      { "src" : ".D. \\\\ ||{1}{{}} " },
+      { "src" : ".cIT  ||{|@|  %  \\}  }  {  *󔜫񥘃󸇀  }{{}}" },
+      { "src" : ".򱋿󠆿 . |@| {$󛒁񓝖 & \\{ . @𯼼򋼡= $򵀀񓞈}{{\\\\}}" }
+  ]
+}
+
diff --git a/testdata/message2/valid-tests.json b/testdata/message2/valid-tests.json
index 5f1292e00896..4b1ce8d67fb6 100644
--- a/testdata/message2/valid-tests.json
+++ b/testdata/message2/valid-tests.json
@@ -123,6 +123,34 @@
       "comment": "message -> complex-message -> *(declaration [s]) complex-body -> declaration complex-body -> input-declaration complex-body -> input variable-expression complex-body",
       "src": ".input{$x}{{}}",
       "exp": ""
+    },
+    {
+      "comment": "Test that escaped characters are re-escaped properly when serializing literals",
+      "src": "{|\\}|}",
+      "exp": "}"
+    },
+    {
+      "comment": "Test that escaped characters are re-escaped properly when serializing patterns",
+      "src": "\\|",
+      "exp": "|"
+    },
+    {
+      "comment": "Test that parser and serializer treat optionally-escaped characters consistently",
+      "src" : "{{{1}|}}",
+      "exp": "1|"
+    },
+    {
+      "comment": "Trailing whitespace after match is valid",
+      "src": ".match {1 :string} * {{}} ",
+      "exp": ""
+    },
+    {
+      "src" : "𴢸",
+      "exp" : "𴢸"
+    },
+    {
+      "src" : "{{􍅲}}",
+      "exp" : "􍅲"
     }
   ]
 }