Skip to content

Commit

Permalink
Update identifier ABNF and parser
Browse files Browse the repository at this point in the history
Previously, the ABNF did not match the identifier parser.

This commit updates the ABNF to match current parsing behavior, and
updates the identifier parser to avoid out of bounds exceptions.

Also, a set of tests are added for ONLY parsing identifiers rather
than being mixed with namespace tests.
  • Loading branch information
Steven Yuan authored and syall committed Oct 24, 2022
1 parent c079c9b commit d1a8f39
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 6 deletions.
2 changes: 1 addition & 1 deletion docs/source-1.0/spec/core/model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ Shape IDs are formally defined by the following ABNF:
AbsoluteRootShapeId :`Namespace` "#" `Identifier`
Namespace :`Identifier` *("." `Identifier`)
Identifier :`IdentifierStart` *`IdentifierChars`
IdentifierStart :*"_" ALPHA
IdentifierStart :(1*"_" (ALPHA / DIGIT)) / ALPHA
IdentifierChars :ALPHA / DIGIT / "_"
ShapeIdMember :"$" `Identifier`

Expand Down
2 changes: 1 addition & 1 deletion docs/source-2.0/spec/model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ Shape IDs are formally defined by the following ABNF:
AbsoluteRootShapeId :`Namespace` "#" `Identifier`
Namespace :`Identifier` *("." `Identifier`)
Identifier :`IdentifierStart` *`IdentifierChars`
IdentifierStart :*"_" ALPHA
IdentifierStart :(1*"_" (ALPHA / DIGIT)) / ALPHA
IdentifierChars :ALPHA / DIGIT / "_"
ShapeIdMember :"$" `Identifier`

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,21 +131,25 @@ private static int parseIdentifier(CharSequence identifier, int offset) {
return -1;
}

// Parse the required identifier_start production.
// Parse the required IdentifierStart production.
char startingChar = identifier.charAt(offset);
if (startingChar == '_') {
while (identifier.charAt(offset) == '_') {
while (offset < identifier.length() && identifier.charAt(offset) == '_') {
offset++;
}
if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(offset))) {
if (offset == identifier.length()) {
return -1;
}
char current = identifier.charAt(offset);
if (!ParserUtils.isAlphabetic(current) && !ParserUtils.isDigit(current)) {
return -1;
}
offset++;
} else if (!ParserUtils.isAlphabetic(startingChar)) {
return -1;
}

// Parse the optional identifier_chars production.
// Parse the optional IdentifierChars production.
while (offset < identifier.length()) {
if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(offset))) {
// Return the position of the character that stops the identifier.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,57 @@ public void checksIfValidNamespaceName() {
assertTrue(ShapeId.isValidNamespace("f.b1.c_d_1234.e"));
}

@Test
public void checksIfValidNamespaceNameIdentifierOnly() {
// Empty String
assertFalse(ShapeId.isValidNamespace(""));

// Underscore Only
assertFalse(ShapeId.isValidNamespace("_"));
assertFalse(ShapeId.isValidNamespace("__"));

// Starts with DIGIT
assertFalse(ShapeId.isValidNamespace("1"));
assertFalse(ShapeId.isValidNamespace("1_"));
assertFalse(ShapeId.isValidNamespace("1a"));
assertFalse(ShapeId.isValidNamespace("12"));

// IdentifierStart: 1*"_" (ALPHA / DIGIT)
assertTrue(ShapeId.isValidNamespace("_a"));
assertTrue(ShapeId.isValidNamespace("_1"));
assertTrue(ShapeId.isValidNamespace("__a"));
assertTrue(ShapeId.isValidNamespace("__1"));
assertFalse(ShapeId.isValidNamespace("__#"));

// IdentifierStart: ALPHA
assertTrue(ShapeId.isValidNamespace("a"));
assertFalse(ShapeId.isValidNamespace("#"));

// Identifier: 1*"_" (ALPHA / DIGIT) *`IdentifierChars`
assertTrue(ShapeId.isValidNamespace("_ab"));
assertTrue(ShapeId.isValidNamespace("_a1"));
assertTrue(ShapeId.isValidNamespace("_a_"));
assertFalse(ShapeId.isValidNamespace("_a#"));
assertTrue(ShapeId.isValidNamespace("_1a"));
assertTrue(ShapeId.isValidNamespace("_12"));
assertTrue(ShapeId.isValidNamespace("_1_"));
assertFalse(ShapeId.isValidNamespace("_1#"));
assertTrue(ShapeId.isValidNamespace("__ab"));
assertTrue(ShapeId.isValidNamespace("__a1"));
assertTrue(ShapeId.isValidNamespace("__a_"));
assertFalse(ShapeId.isValidNamespace("__a#"));
assertTrue(ShapeId.isValidNamespace("__1a"));
assertTrue(ShapeId.isValidNamespace("__12"));
assertTrue(ShapeId.isValidNamespace("__1_"));
assertFalse(ShapeId.isValidNamespace("__1#"));

// Identifier: ALPHA *`IdentifierChars`
assertTrue(ShapeId.isValidNamespace("ab"));
assertTrue(ShapeId.isValidNamespace("a1"));
assertTrue(ShapeId.isValidNamespace("a_"));
assertFalse(ShapeId.isValidNamespace("a#"));
}

@ParameterizedTest
@MethodSource("shapeIdData")
public void ShapeIdValidationsTest(String shapeId, boolean isInvalid) {
Expand Down

0 comments on commit d1a8f39

Please sign in to comment.