From 843fe69cf88f7cac20465703df0cb3fbae1c3992 Mon Sep 17 00:00:00 2001 From: Steven Yuan Date: Mon, 24 Oct 2022 10:50:05 -0700 Subject: [PATCH] Update identifier ABNF and parser Previously, the ABNF did not match the identifier parser. This commit updates the ABNF to match current parsing behavior, and updates the identifier parser to avoid out of bounds exceptions. Also, a set of tests are added for ONLY parsing identifiers rather than being mixed with namespace tests. --- docs/source-1.0/spec/core/model.rst | 2 +- docs/source-2.0/spec/model.rst | 2 +- .../amazon/smithy/model/shapes/ShapeId.java | 12 +++-- .../smithy/model/shapes/ShapeIdTest.java | 51 +++++++++++++++++++ 4 files changed, 61 insertions(+), 6 deletions(-) diff --git a/docs/source-1.0/spec/core/model.rst b/docs/source-1.0/spec/core/model.rst index d184f61da76..1d7f33fa147 100644 --- a/docs/source-1.0/spec/core/model.rst +++ b/docs/source-1.0/spec/core/model.rst @@ -244,7 +244,7 @@ Shape IDs are formally defined by the following ABNF: AbsoluteRootShapeId :`Namespace` "#" `Identifier` Namespace :`Identifier` *("." `Identifier`) Identifier :`IdentifierStart` *`IdentifierChars` - IdentifierStart :*"_" ALPHA + IdentifierStart :(1*"_" (ALPHA / DIGIT)) / ALPHA IdentifierChars :ALPHA / DIGIT / "_" ShapeIdMember :"$" `Identifier` diff --git a/docs/source-2.0/spec/model.rst b/docs/source-2.0/spec/model.rst index af962e4a4f8..e0e07b803f5 100644 --- a/docs/source-2.0/spec/model.rst +++ b/docs/source-2.0/spec/model.rst @@ -550,7 +550,7 @@ Shape IDs are formally defined by the following ABNF: AbsoluteRootShapeId :`Namespace` "#" `Identifier` Namespace :`Identifier` *("." `Identifier`) Identifier :`IdentifierStart` *`IdentifierChars` - IdentifierStart :*"_" ALPHA + IdentifierStart :(1*"_" (ALPHA / DIGIT)) / ALPHA IdentifierChars :ALPHA / DIGIT / "_" ShapeIdMember :"$" `Identifier` diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/shapes/ShapeId.java b/smithy-model/src/main/java/software/amazon/smithy/model/shapes/ShapeId.java index de421459cd7..dd2ba06ffde 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/shapes/ShapeId.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/shapes/ShapeId.java @@ -131,13 +131,17 @@ private static int parseIdentifier(CharSequence identifier, int offset) { return -1; } - // Parse the required identifier_start production. + // Parse the required IdentifierStart production. char startingChar = identifier.charAt(offset); if (startingChar == '_') { - while (identifier.charAt(offset) == '_') { + while (offset < identifier.length() && identifier.charAt(offset) == '_') { offset++; } - if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(offset))) { + if (offset == identifier.length()) { + return -1; + } + char current = identifier.charAt(offset); + if (!ParserUtils.isAlphabetic(current) && !ParserUtils.isDigit(current)) { return -1; } offset++; @@ -145,7 +149,7 @@ private static int parseIdentifier(CharSequence identifier, int offset) { return -1; } - // Parse the optional identifier_chars production. + // Parse the optional IdentifierChars production. while (offset < identifier.length()) { if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(offset))) { // Return the position of the character that stops the identifier. diff --git a/smithy-model/src/test/java/software/amazon/smithy/model/shapes/ShapeIdTest.java b/smithy-model/src/test/java/software/amazon/smithy/model/shapes/ShapeIdTest.java index 15a4d68d9f4..fa81cadebc4 100644 --- a/smithy-model/src/test/java/software/amazon/smithy/model/shapes/ShapeIdTest.java +++ b/smithy-model/src/test/java/software/amazon/smithy/model/shapes/ShapeIdTest.java @@ -136,6 +136,57 @@ public void checksIfValidNamespaceName() { assertTrue(ShapeId.isValidNamespace("f.b1.c_d_1234.e")); } + @Test + public void checksIfValidNamespaceNameIdentifierOnly() { + // Empty String + assertFalse(ShapeId.isValidNamespace("")); + + // Underscore Only + assertFalse(ShapeId.isValidNamespace("_")); + assertFalse(ShapeId.isValidNamespace("__")); + + // Starts with DIGIT + assertFalse(ShapeId.isValidNamespace("1")); + assertFalse(ShapeId.isValidNamespace("1_")); + assertFalse(ShapeId.isValidNamespace("1a")); + assertFalse(ShapeId.isValidNamespace("12")); + + // IdentifierStart: 1*"_" (ALPHA / DIGIT) + assertTrue(ShapeId.isValidNamespace("_a")); + assertTrue(ShapeId.isValidNamespace("_1")); + assertTrue(ShapeId.isValidNamespace("__a")); + assertTrue(ShapeId.isValidNamespace("__1")); + assertFalse(ShapeId.isValidNamespace("__#")); + + // IdentifierStart: ALPHA + assertTrue(ShapeId.isValidNamespace("a")); + assertFalse(ShapeId.isValidNamespace("#")); + + // Identifier: 1*"_" (ALPHA / DIGIT) *`IdentifierChars` + assertTrue(ShapeId.isValidNamespace("_ab")); + assertTrue(ShapeId.isValidNamespace("_a1")); + assertTrue(ShapeId.isValidNamespace("_a_")); + assertFalse(ShapeId.isValidNamespace("_a#")); + assertTrue(ShapeId.isValidNamespace("_1a")); + assertTrue(ShapeId.isValidNamespace("_12")); + assertTrue(ShapeId.isValidNamespace("_1_")); + assertFalse(ShapeId.isValidNamespace("_1#")); + assertTrue(ShapeId.isValidNamespace("__ab")); + assertTrue(ShapeId.isValidNamespace("__a1")); + assertTrue(ShapeId.isValidNamespace("__a_")); + assertFalse(ShapeId.isValidNamespace("__a#")); + assertTrue(ShapeId.isValidNamespace("__1a")); + assertTrue(ShapeId.isValidNamespace("__12")); + assertTrue(ShapeId.isValidNamespace("__1_")); + assertFalse(ShapeId.isValidNamespace("__1#")); + + // Identifier: ALPHA *`IdentifierChars` + assertTrue(ShapeId.isValidNamespace("ab")); + assertTrue(ShapeId.isValidNamespace("a1")); + assertTrue(ShapeId.isValidNamespace("a_")); + assertFalse(ShapeId.isValidNamespace("a#")); + } + @ParameterizedTest @MethodSource("shapeIdData") public void ShapeIdValidationsTest(String shapeId, boolean isInvalid) {