Skip to content

Commit

Permalink
Use IntPredicate and inline end of line parsing
Browse files Browse the repository at this point in the history
This commit makes minor performance improvements to parsing so that
we can use consumeWhile with an IntPredicate rather than needing to
box a char with a Predicate<Character>. To accommodate this, I
deprecated `consumeUntilNoLongerMatches(Predicate<Character>)` in favor
of `consumeWhile(IntPredicate)`. I also added overloads to ParserUtils
to accept int in addition to char, matching how other classes in the
JDK work like Character.isDigit.
  • Loading branch information
mtdowling committed Apr 19, 2023
1 parent 490452b commit 94bf11b
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,7 @@ private IdlToken tokenizeNewline() {
}

private IdlToken tokenizeSpace() {
parser.consumeUntilNoLongerMatches(c -> c == ' ' || c == '\t');
parser.consumeWhile(c -> c == ' ' || c == '\t');
currentTokenEnd = parser.position();
return currentTokenType = IdlToken.SPACE;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ public static String parseNumber(SimpleParser parser) {
}
}

parser.consumeUntilNoLongerMatches(ParserUtils::isDigit);
parser.consumeWhile(ParserUtils::isDigit);

// Consume decimals.
char peek = parser.peek();
if (peek == '.') {
parser.skip();
if (parser.consumeUntilNoLongerMatches(ParserUtils::isDigit) == 0) {
if (parser.consumeWhile(ParserUtils::isDigit) == 0) {
throw parser.syntax(createInvalidString(parser, startPosition, "'.' must be followed by a digit"));
}
}
Expand All @@ -65,7 +65,7 @@ public static String parseNumber(SimpleParser parser) {
if (peek == '+' || peek == '-') {
parser.skip();
}
if (parser.consumeUntilNoLongerMatches(ParserUtils::isDigit) == 0) {
if (parser.consumeWhile(ParserUtils::isDigit) == 0) {
throw parser.syntax(
createInvalidString(parser, startPosition, "'e', '+', and '-' must be followed by a digit"));
}
Expand Down Expand Up @@ -158,7 +158,7 @@ public static void consumeIdentifier(SimpleParser parser) {
// Parse identifier_start
char c = parser.peek();
if (c == '_') {
parser.consumeUntilNoLongerMatches(next -> next == '_');
parser.consumeWhile(next -> next == '_');
if (!ParserUtils.isValidIdentifierCharacter(parser.peek())) {
throw invalidIdentifier(parser);
}
Expand All @@ -170,7 +170,7 @@ public static void consumeIdentifier(SimpleParser parser) {
parser.skip();

// Parse identifier_chars
parser.consumeUntilNoLongerMatches(ParserUtils::isValidIdentifierCharacter);
parser.consumeWhile(ParserUtils::isValidIdentifierCharacter);
}

private static RuntimeException invalidIdentifier(SimpleParser parser) {
Expand All @@ -185,6 +185,16 @@ private static RuntimeException invalidIdentifier(SimpleParser parser) {
* @return Returns true if the character is allowed in an identifier.
*/
public static boolean isValidIdentifierCharacter(char c) {
return isValidIdentifierCharacter((int) c);
}

/**
* Returns true if the given character is allowed in an identifier.
*
* @param c Character to check.
* @return Returns true if the character is allowed in an identifier.
*/
public static boolean isValidIdentifierCharacter(int c) {
return isIdentifierStart(c) || isDigit(c);
}

Expand All @@ -195,6 +205,16 @@ public static boolean isValidIdentifierCharacter(char c) {
* @return Returns true if the character can start an identifier.
*/
public static boolean isIdentifierStart(char c) {
return isIdentifierStart((int) c);
}

/**
* Returns true if the given character is allowed to start an identifier.
*
* @param c Character to check.
* @return Returns true if the character can start an identifier.
*/
public static boolean isIdentifierStart(int c) {
return c == '_' || isAlphabetic(c);
}

Expand All @@ -205,6 +225,16 @@ public static boolean isIdentifierStart(char c) {
* @return Returns true if the character is a digit.
*/
public static boolean isDigit(char c) {
return isDigit((int) c);
}

/**
* Returns true if the given value is a digit 0-9.
*
* @param c Character to check.
* @return Returns true if the character is a digit.
*/
public static boolean isDigit(int c) {
return c >= '0' && c <= '9';
}

Expand All @@ -216,6 +246,17 @@ public static boolean isDigit(char c) {
* @return Returns true if the character is an alphabetic character.
*/
public static boolean isAlphabetic(char c) {
return isAlphabetic((int) c);
}

/**
* Returns true if the given character is an alphabetic character
* A-Z, a-z. This is a stricter version of {@link Character#isAlphabetic}.
*
* @param c Character to check.
* @return Returns true if the character is an alphabetic character.
*/
public static boolean isAlphabetic(int c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ private MessageTemplateParser(String expression) {

MessageTemplate parse() {
while (!eof()) {
consumeUntilNoLongerMatches(c -> c != '@');
consumeWhile(c -> c != '@');
// '@' followed by '@' is an escaped '@", so keep parsing
// the marked literal if that's the case.
if (peek(1) == '@') {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ private void parseBracedArgument(int pendingTextStart) {
if (parser.peek() == '@') {
parser.skip();
int start = parser.position();
parser.consumeUntilNoLongerMatches(this::isNameCharacter);
parser.consumeWhile(this::isNameCharacter);
String sectionName = parser.sliceFrom(start);
ensureNameIsValid(sectionName);
operation = Operation.inlineSection(sectionName, operation);
Expand Down Expand Up @@ -518,7 +518,7 @@ boolean parseStripTrailingWhitespace() {
}

void skipTrailingWhitespaceInParser() {
parser.consumeUntilNoLongerMatches(Character::isWhitespace);
parser.consumeWhile(Character::isWhitespace);
}

private boolean isAllLeadingWhitespaceOnLine(int startPosition, int startColumn) {
Expand Down Expand Up @@ -549,13 +549,13 @@ private void pushBlock(int pendingTextStart, int startPosition, int startLine, i
parser.expect('s');
parser.sp();
int startPos = parser.position();
parser.consumeUntilNoLongerMatches(this::isNameCharacter);
parser.consumeWhile(this::isNameCharacter);
keyPrefix = parser.sliceFrom(startPos);
ensureNameIsValid(keyPrefix);
parser.expect(',');
parser.sp();
startPos = parser.position();
parser.consumeUntilNoLongerMatches(this::isNameCharacter);
parser.consumeWhile(this::isNameCharacter);
value = parser.sliceFrom(startPos);
ensureNameIsValid(value);
}
Expand Down Expand Up @@ -646,7 +646,7 @@ private Operation parseNormalArgument() {

private String parseArgumentName() {
int start = parser.position();
parser.consumeUntilNoLongerMatches(this::isNameCharacter);
parser.consumeWhile(this::isNameCharacter);
String name = parser.sliceFrom(start);
ensureNameIsValid(name);
return name;
Expand Down Expand Up @@ -694,7 +694,7 @@ private Function<AbstractCodeWriter<?>, Object> parsePositionalArgumentGetter()

relativeIndex = -1;
int startPosition = parser.position();
parser.consumeUntilNoLongerMatches(Character::isDigit);
parser.consumeWhile(Character::isDigit);
int index = Integer.parseInt(parser.sliceFrom(startPosition)) - 1;

if (index < 0 || index >= arguments.length) {
Expand All @@ -713,7 +713,7 @@ private void ensureNameIsValid(String name) {
}
}

private boolean isNameCharacter(char c) {
private boolean isNameCharacter(int c) {
return (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ private void parse() {

private String parseToken() {
int start = position();
consumeUntilNoLongerMatches(TOKEN::contains);
consumeWhile(c -> TOKEN.contains((char) c));

// Fail if the token was empty.
if (start == position()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import java.nio.CharBuffer;
import java.util.Objects;
import java.util.function.IntPredicate;
import java.util.function.Predicate;

/**
Expand Down Expand Up @@ -303,7 +304,11 @@ public void skip() {
* the contents of the skipped characters using {@link #sliceFrom(int)}.
*/
public void consumeRemainingCharactersOnLine() {
consumeUntilNoLongerMatches(c -> c != '\n' && c != '\r');
char ch = peek();
while (ch != EOF && ch != '\n' && ch != '\r') {
skip();
ch = peek();
}
}

/**
Expand Down Expand Up @@ -340,23 +345,30 @@ public final CharSequence borrowSliceFrom(int start, int removeRight) {
return CharBuffer.wrap(input, start, position - removeRight);
}

@Deprecated
public final int consumeUntilNoLongerMatches(Predicate<Character> predicate) {
int startPosition = position;
char ch = peek();
while (ch != EOF && predicate.test(ch)) {
skip();
ch = peek();
}
return position - startPosition;
}

/**
* Reads a lexeme from the expression while the given {@code predicate}
* matches each peeked character.
* Reads a lexeme from the expression while the given {@code predicate} matches each peeked character.
*
* @param predicate Predicate that filters characters.
* @return Returns the consumed lexeme (or an empty string on no matches).
*/
public final int consumeUntilNoLongerMatches(Predicate<Character> predicate) {
public final int consumeWhile(IntPredicate predicate) {
int startPosition = position;
while (!eof()) {
char peekedChar = peek();
if (!predicate.test(peekedChar)) {
break;
}
char ch = peek();
while (ch != EOF && predicate.test(ch)) {
skip();
ch = peek();
}

return position - startPosition;
}

Expand Down

0 comments on commit 94bf11b

Please sign in to comment.