From a779b4769aeee78f4b8e2d3ac954401256fe72be Mon Sep 17 00:00:00 2001 From: Michael Dowling Date: Wed, 15 Jul 2020 17:13:38 -0700 Subject: [PATCH] Disallow problematic identifiers These newly disallowed identifiers were never actually supported in practice and are problematic on multiple levels: 1. An identifier like `_` or `__` are bad names that don't help users understand an API. 2. Identifiers like `_` are problematic for languages like Rust that use this character for pattern matching. --- docs/source/1.0/spec/core/idl.rst | 6 ++ docs/source/1.0/spec/core/model.rst | 4 +- .../smithy/model/loader/ParserUtils.java | 40 +++++++++-- .../amazon/smithy/model/shapes/ShapeId.java | 69 ++++++++++++------- .../smithy/model/selector/SelectorTest.java | 7 +- .../smithy/model/shapes/ShapeIdTest.java | 3 +- .../invalid/invalid-unquoted-shape-id.smithy | 2 +- .../loader/invalid/shape-name-syntax2.smithy | 4 ++ .../loader/invalid/shape-name-syntax3.smithy | 4 ++ .../invalid/use/use-invalid-namespace.smithy | 2 +- 10 files changed, 103 insertions(+), 38 deletions(-) create mode 100644 smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/shape-name-syntax2.smithy create mode 100644 smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/shape-name-syntax3.smithy diff --git a/docs/source/1.0/spec/core/idl.rst b/docs/source/1.0/spec/core/idl.rst index 3b8cbf1fee1..615d7f3c08d 100644 --- a/docs/source/1.0/spec/core/idl.rst +++ b/docs/source/1.0/spec/core/idl.rst @@ -216,6 +216,12 @@ The Smithy IDL is defined by the following ABNF: trait_structure_kvp :`node_object_key` `ws` ":" `ws` `node_value` apply_statement :"apply" `ws` `shape_id` `ws` `trait` `br` +.. rubric:: Shape ID + +.. seealso:: + + Refer to :ref:`shape-id` for the ABNF grammar of shape IDs. + .. _comments: diff --git a/docs/source/1.0/spec/core/model.rst b/docs/source/1.0/spec/core/model.rst index 0221c7b4b5d..466585297a8 100644 --- a/docs/source/1.0/spec/core/model.rst +++ b/docs/source/1.0/spec/core/model.rst @@ -445,7 +445,9 @@ Shape IDs are formally defined by the following ABNF: root_shape_id :`absolute_root_shape_id` / `identifier` absolute_root_shape_id :`namespace` "#" `identifier` namespace :`identifier` *("." `identifier`) - identifier :(ALPHA / "_") *(ALPHA / DIGIT / "_") + identifier :identifier_start *identifier_chars + identifier_start :*"_" ALPHA + identifier_chars :ALPHA / DIGIT / "_" shape_id_member :"$" `identifier` .. rubric:: Best practices for defining shape names diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/loader/ParserUtils.java b/smithy-model/src/main/java/software/amazon/smithy/model/loader/ParserUtils.java index 016d5d1d16d..fd15afb9cc1 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/loader/ParserUtils.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/loader/ParserUtils.java @@ -147,22 +147,37 @@ public static void consumeNamespace(SimpleParser parser) { * Expects and skips over a Smithy identifier production. * *
-     *     identifier = (ALPHA / "_") *(ALPHA / DIGIT / "_")
+     *     identifier       = identifier_start *identifier_chars
+     *     identifier_start = *"_" ALPHA
+     *     identifier_chars = ALPHA / DIGIT / "_"
      * 
* * @param parser Parser to consume tokens from. */ public static void consumeIdentifier(SimpleParser parser) { - // (ALPHA / "_") - if (!isIdentifierStart(parser.peek())) { - throw parser.syntax("Expected a valid identifier character, but found '" - + parser.peekSingleCharForMessage() + '\''); + // Parse identifier_start + char c = parser.peek(); + if (c == '_') { + parser.consumeUntilNoLongerMatches(next -> next == '_'); + if (!ParserUtils.isValidIdentifierCharacter(parser.peek())) { + throw invalidIdentifier(parser); + } + } else if (!isAlphabetic(c)) { + throw invalidIdentifier(parser); } - // *(ALPHA / DIGIT / "_") + // Skip the first character since it's known to be valid. + parser.skip(); + + // Parse identifier_chars parser.consumeUntilNoLongerMatches(ParserUtils::isValidIdentifierCharacter); } + private static RuntimeException invalidIdentifier(SimpleParser parser) { + throw parser.syntax("Expected a valid identifier character, but found '" + + parser.peekSingleCharForMessage() + '\''); + } + /** * Returns true if the given character is allowed in an identifier. * @@ -180,7 +195,7 @@ public static boolean isValidIdentifierCharacter(char c) { * @return Returns true if the character can start an identifier. */ public static boolean isIdentifierStart(char c) { - return c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); + return c == '_' || isAlphabetic(c); } /** @@ -192,4 +207,15 @@ public static boolean isIdentifierStart(char c) { public static boolean isDigit(char c) { return c >= '0' && c <= '9'; } + + /** + * Returns true if the given character is an alphabetic character + * A-Z, a-z. This is a stricter version of {@link Character#isAlphabetic}. + * + * @param c Character to check. + * @return Returns true if the character is an alphabetic character. + */ + public static boolean isAlphabetic(char c) { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); + } } diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/shapes/ShapeId.java b/smithy-model/src/main/java/software/amazon/smithy/model/shapes/ShapeId.java index 745a0f0d93b..517b9438354 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/shapes/ShapeId.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/shapes/ShapeId.java @@ -85,29 +85,29 @@ public static ShapeId from(String id) { public static boolean isValidNamespace(CharSequence namespace) { if (namespace == null) { return false; + } else if (namespace.equals(Prelude.NAMESPACE)) { + // Shortcut for prelude namespaces. + return true; } - // Shortcut for prelude namespaces. - if (namespace.equals(Prelude.NAMESPACE)) { - return true; + int length = namespace.length(); + if (length == 0) { + return false; } - boolean start = true; - for (int position = 0; position < namespace.length(); position++) { - char c = namespace.charAt(position); - if (start) { - start = false; - if (!ParserUtils.isIdentifierStart(c)) { - return false; - } - } else if (c == '.') { - start = true; - } else if (!ParserUtils.isValidIdentifierCharacter(c)) { + int position = 0; + while (true) { + position = parseIdentifier(namespace, position); + if (position == -1) { // Bad: did not parse a valid identifier. return false; - } + } else if (position == length) { // Good: parsed and reached the end. + return true; + } else if (namespace.charAt(position) != '.') { // Bad: invalid character. + return false; + } else if (++position >= length) { // Bad: trailing '.' + return false; + } // continue parsing after '.', expecting an identifier. } - - return !start; } /** @@ -117,21 +117,40 @@ public static boolean isValidNamespace(CharSequence namespace) { * @return Returns true if this is a valid identifier. */ public static boolean isValidIdentifier(CharSequence identifier) { - if (identifier == null || identifier.length() == 0) { - return false; + return parseIdentifier(identifier, 0) == identifier.length(); + } + + private static int parseIdentifier(CharSequence identifier, int offset) { + if (identifier == null || identifier.length() <= offset) { + return -1; } - if (!ParserUtils.isIdentifierStart(identifier.charAt(0))) { - return false; + // Parse the required identifier_start production. + char startingChar = identifier.charAt(offset); + if (startingChar == '_') { + while (identifier.charAt(offset) == '_') { + offset++; + } + if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(offset))) { + return -1; + } + offset++; + } else if (!ParserUtils.isAlphabetic(startingChar)) { + return -1; } - for (int i = 1; i < identifier.length(); i++) { - if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(i))) { - return false; + // Parse the optional identifier_chars production. + while (offset < identifier.length()) { + if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(offset))) { + // Return the position of the character that stops the identifier. + // This is either an invalid case (e.g., isValidIdentifier), or + // just the marker needed for isValidNamespace to find '.'. + return offset; } + offset++; } - return true; + return offset; } /** diff --git a/smithy-model/src/test/java/software/amazon/smithy/model/selector/SelectorTest.java b/smithy-model/src/test/java/software/amazon/smithy/model/selector/SelectorTest.java index 2e52928c254..9a418a2af27 100644 --- a/smithy-model/src/test/java/software/amazon/smithy/model/selector/SelectorTest.java +++ b/smithy-model/src/test/java/software/amazon/smithy/model/selector/SelectorTest.java @@ -941,7 +941,8 @@ public void parsesValidVariableAccess() { "${\nfoo\n}\n", "${a}", "${a_b_c}", - "${_}"); + "${_a}", + "${__a}"); for (String expr : exprs) { Selector.parse(expr); @@ -956,7 +957,9 @@ public void detectsInvalidVariableAccess() { "${}", "$}", "${a", - "${*}"); + "${*}", + "${_}", + "${__}"); for (String expr : exprs) { Assertions.assertThrows(SelectorSyntaxException.class, () -> Selector.parse(expr)); diff --git a/smithy-model/src/test/java/software/amazon/smithy/model/shapes/ShapeIdTest.java b/smithy-model/src/test/java/software/amazon/smithy/model/shapes/ShapeIdTest.java index b9f582b9f56..47cb9fc9e6d 100644 --- a/smithy-model/src/test/java/software/amazon/smithy/model/shapes/ShapeIdTest.java +++ b/smithy-model/src/test/java/software/amazon/smithy/model/shapes/ShapeIdTest.java @@ -170,7 +170,6 @@ public static Collection shapeIdData() { { "Mixed.case#name", false}, { "_foo.baz#name", false}, { "__foo.baz#name", false}, - { "a._.b#name", false}, // invalid namespaces { "#name", true}, @@ -179,6 +178,8 @@ public static Collection shapeIdData() { { ".name.space#name", true}, { "name-space#name", true}, { "1namespace.foo#name", true}, + { "a._.b#name", true}, + { "a.____.b#name", true}, // valid shape names { "ns.foo#shapename", false}, diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/invalid-unquoted-shape-id.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/invalid-unquoted-shape-id.smithy index 9a8f1c8713b..439d6d14f90 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/invalid-unquoted-shape-id.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/invalid-unquoted-shape-id.smithy @@ -1,5 +1,5 @@ // Parse error at line 3, column 19 near `#\n`: Expected a valid identifier character, but found '#' -metadata abc = __$# +metadata abc = aa$# namespace foo.baz diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/shape-name-syntax2.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/shape-name-syntax2.smithy new file mode 100644 index 00000000000..2f91da0e3f2 --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/shape-name-syntax2.smithy @@ -0,0 +1,4 @@ +// Parse error at line 4, column 11 near `%I`: Expected a valid identifier character, but found '%' +namespace com.foo + +structure %Invalid {} diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/shape-name-syntax3.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/shape-name-syntax3.smithy new file mode 100644 index 00000000000..a86ff4b1cdb --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/shape-name-syntax3.smithy @@ -0,0 +1,4 @@ +// Parse error at line 4, column 12 near ` `: Expected a valid identifier character, but found ' ' +namespace com.foo + +structure _ {} diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/use/use-invalid-namespace.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/use/use-invalid-namespace.smithy index cc651fad11e..6d0cfc3f26e 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/use/use-invalid-namespace.smithy +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/invalid/use/use-invalid-namespace.smithy @@ -3,4 +3,4 @@ $version: "1.0" namespace smithy.example -use _$#Bar +use a$#Bar