Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update identifier ABNF and parser #1464

Merged
merged 1 commit into from
Oct 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source-1.0/spec/core/model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ Shape IDs are formally defined by the following ABNF:
AbsoluteRootShapeId :`Namespace` "#" `Identifier`
Namespace :`Identifier` *("." `Identifier`)
Identifier :`IdentifierStart` *`IdentifierChars`
IdentifierStart :*"_" ALPHA
IdentifierStart :(1*"_" (ALPHA / DIGIT)) / ALPHA
IdentifierChars :ALPHA / DIGIT / "_"
ShapeIdMember :"$" `Identifier`

Expand Down
2 changes: 1 addition & 1 deletion docs/source-2.0/spec/model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ Shape IDs are formally defined by the following ABNF:
AbsoluteRootShapeId :`Namespace` "#" `Identifier`
Namespace :`Identifier` *("." `Identifier`)
Identifier :`IdentifierStart` *`IdentifierChars`
IdentifierStart :*"_" ALPHA
IdentifierStart :(1*"_" (ALPHA / DIGIT)) / ALPHA
IdentifierChars :ALPHA / DIGIT / "_"
ShapeIdMember :"$" `Identifier`

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,21 +131,25 @@ private static int parseIdentifier(CharSequence identifier, int offset) {
return -1;
}

// Parse the required identifier_start production.
// Parse the required IdentifierStart production.
char startingChar = identifier.charAt(offset);
if (startingChar == '_') {
while (identifier.charAt(offset) == '_') {
while (offset < identifier.length() && identifier.charAt(offset) == '_') {
offset++;
}
if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(offset))) {
if (offset == identifier.length()) {
return -1;
}
char current = identifier.charAt(offset);
if (!ParserUtils.isAlphabetic(current) && !ParserUtils.isDigit(current)) {
return -1;
}
offset++;
} else if (!ParserUtils.isAlphabetic(startingChar)) {
return -1;
}

// Parse the optional identifier_chars production.
// Parse the optional IdentifierChars production.
while (offset < identifier.length()) {
if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(offset))) {
// Return the position of the character that stops the identifier.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,57 @@ public void checksIfValidNamespaceName() {
assertTrue(ShapeId.isValidNamespace("f.b1.c_d_1234.e"));
}

@Test
public void checksIfValidNamespaceNameIdentifierOnly() {
// Empty String
assertFalse(ShapeId.isValidNamespace(""));

// Underscore Only
assertFalse(ShapeId.isValidNamespace("_"));
assertFalse(ShapeId.isValidNamespace("__"));

// Starts with DIGIT
assertFalse(ShapeId.isValidNamespace("1"));
assertFalse(ShapeId.isValidNamespace("1_"));
assertFalse(ShapeId.isValidNamespace("1a"));
assertFalse(ShapeId.isValidNamespace("12"));

// IdentifierStart: 1*"_" (ALPHA / DIGIT)
assertTrue(ShapeId.isValidNamespace("_a"));
assertTrue(ShapeId.isValidNamespace("_1"));
assertTrue(ShapeId.isValidNamespace("__a"));
assertTrue(ShapeId.isValidNamespace("__1"));
assertFalse(ShapeId.isValidNamespace("__#"));

// IdentifierStart: ALPHA
assertTrue(ShapeId.isValidNamespace("a"));
assertFalse(ShapeId.isValidNamespace("#"));

// Identifier: 1*"_" (ALPHA / DIGIT) *`IdentifierChars`
assertTrue(ShapeId.isValidNamespace("_ab"));
assertTrue(ShapeId.isValidNamespace("_a1"));
assertTrue(ShapeId.isValidNamespace("_a_"));
assertFalse(ShapeId.isValidNamespace("_a#"));
assertTrue(ShapeId.isValidNamespace("_1a"));
assertTrue(ShapeId.isValidNamespace("_12"));
assertTrue(ShapeId.isValidNamespace("_1_"));
assertFalse(ShapeId.isValidNamespace("_1#"));
assertTrue(ShapeId.isValidNamespace("__ab"));
assertTrue(ShapeId.isValidNamespace("__a1"));
assertTrue(ShapeId.isValidNamespace("__a_"));
assertFalse(ShapeId.isValidNamespace("__a#"));
assertTrue(ShapeId.isValidNamespace("__1a"));
assertTrue(ShapeId.isValidNamespace("__12"));
assertTrue(ShapeId.isValidNamespace("__1_"));
assertFalse(ShapeId.isValidNamespace("__1#"));

// Identifier: ALPHA *`IdentifierChars`
assertTrue(ShapeId.isValidNamespace("ab"));
assertTrue(ShapeId.isValidNamespace("a1"));
assertTrue(ShapeId.isValidNamespace("a_"));
assertFalse(ShapeId.isValidNamespace("a#"));
}

@ParameterizedTest
@MethodSource("shapeIdData")
public void ShapeIdValidationsTest(String shapeId, boolean isInvalid) {
Expand Down