Skip to content

Commit

Permalink
Disallow problematic identifiers
Browse files Browse the repository at this point in the history
These newly disallowed identifiers were never actually supported in
practice and are problematic on multiple levels:

1. An identifier like `_` or `__` are bad names that don't help users
   understand an API.
2. Identifiers like `_` are problematic for languages like Rust that use
   this character for pattern matching.
  • Loading branch information
mtdowling committed Jul 16, 2020
1 parent 3dbe356 commit 3ca1ce2
Show file tree
Hide file tree
Showing 10 changed files with 103 additions and 38 deletions.
6 changes: 6 additions & 0 deletions docs/source/1.0/spec/core/idl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,12 @@ The Smithy IDL is defined by the following ABNF:
trait_structure_kvp :`node_object_key` `ws` ":" `ws` `node_value`
apply_statement :"apply" `ws` `shape_id` `ws` `trait` `br`
.. rubric:: Shape ID

.. seealso::

Refer to :ref:`shape-id` for the ABNF grammar of shape IDs.


.. _comments:

Expand Down
4 changes: 3 additions & 1 deletion docs/source/1.0/spec/core/model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,9 @@ Shape IDs are formally defined by the following ABNF:
root_shape_id :`absolute_root_shape_id` / `identifier`
absolute_root_shape_id :`namespace` "#" `identifier`
namespace :`identifier` *("." `identifier`)
identifier :(ALPHA / "_") *(ALPHA / DIGIT / "_")
identifier :identifier_start *identifier_chars
identifier_start :*"_" ALPHA
identifier_chars :ALPHA / DIGIT / "_"
shape_id_member :"$" `identifier`

.. rubric:: Best practices for defining shape names
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,22 +147,37 @@ public static void consumeNamespace(SimpleParser parser) {
* Expects and skips over a Smithy identifier production.
*
* <pre>
* identifier = (ALPHA / "_") *(ALPHA / DIGIT / "_")
* identifier = identifier_start *identifier_chars
* identifier_start = *"_" ALPHA
* identifier_chars = ALPHA / DIGIT / "_"
* </pre>
*
* @param parser Parser to consume tokens from.
*/
public static void consumeIdentifier(SimpleParser parser) {
// (ALPHA / "_")
if (!isIdentifierStart(parser.peek())) {
throw parser.syntax("Expected a valid identifier character, but found '"
+ parser.peekSingleCharForMessage() + '\'');
// Parse identifier_start
char c = parser.peek();
if (c == '_') {
parser.consumeUntilNoLongerMatches(next -> next == '_');
if (!ParserUtils.isValidIdentifierCharacter(parser.peek())) {
throw invalidIdentifier(parser);
}
} else if (!isAlphabetic(c)) {
throw invalidIdentifier(parser);
}

// *(ALPHA / DIGIT / "_")
// Skip the first character since it's known to be valid.
parser.skip();

// Parse identifier_chars
parser.consumeUntilNoLongerMatches(ParserUtils::isValidIdentifierCharacter);
}

private static RuntimeException invalidIdentifier(SimpleParser parser) {
throw parser.syntax("Expected a valid identifier character, but found '"
+ parser.peekSingleCharForMessage() + '\'');
}

/**
* Returns true if the given character is allowed in an identifier.
*
Expand All @@ -180,7 +195,7 @@ public static boolean isValidIdentifierCharacter(char c) {
* @return Returns true if the character can start an identifier.
*/
public static boolean isIdentifierStart(char c) {
return c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
return c == '_' || isAlphabetic(c);
}

/**
Expand All @@ -192,4 +207,15 @@ public static boolean isIdentifierStart(char c) {
public static boolean isDigit(char c) {
return c >= '0' && c <= '9';
}

/**
* Returns true if the given character is an alphabetic character
* A-Z, a-z. This is a stricter version of {@link Character#isAlphabetic}.
*
* @param c Character to check.
* @return Returns true if the character is an alphabetic character.
*/
public static boolean isAlphabetic(char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,29 +85,29 @@ public static ShapeId from(String id) {
public static boolean isValidNamespace(CharSequence namespace) {
if (namespace == null) {
return false;
} else if (namespace.equals(Prelude.NAMESPACE)) {
// Shortcut for prelude namespaces.
return true;
}

// Shortcut for prelude namespaces.
if (namespace.equals(Prelude.NAMESPACE)) {
return true;
int length = namespace.length();
if (length == 0) {
return false;
}

boolean start = true;
for (int position = 0; position < namespace.length(); position++) {
char c = namespace.charAt(position);
if (start) {
start = false;
if (!ParserUtils.isIdentifierStart(c)) {
return false;
}
} else if (c == '.') {
start = true;
} else if (!ParserUtils.isValidIdentifierCharacter(c)) {
int position = 0;
while (true) {
position = parseIdentifier(namespace, position);
if (position == -1) { // Bad: did not parse a valid identifier.
return false;
}
} else if (position == length) { // Good: parsed and reached the end.
return true;
} else if (namespace.charAt(position) != '.') { // Bad: invalid character.
return false;
} else if (++position >= length) { // Bad: trailing '.'
return false;
} // continue parsing after '.', expecting an identifier.
}

return !start;
}

/**
Expand All @@ -117,21 +117,40 @@ public static boolean isValidNamespace(CharSequence namespace) {
* @return Returns true if this is a valid identifier.
*/
public static boolean isValidIdentifier(CharSequence identifier) {
if (identifier == null || identifier.length() == 0) {
return false;
return parseIdentifier(identifier, 0) == identifier.length();
}

private static int parseIdentifier(CharSequence identifier, int offset) {
if (identifier == null || identifier.length() <= offset) {
return -1;
}

if (!ParserUtils.isIdentifierStart(identifier.charAt(0))) {
return false;
// Parse the required identifier_start production.
char startingChar = identifier.charAt(offset);
if (startingChar == '_') {
while (identifier.charAt(offset) == '_') {
offset++;
}
if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(offset))) {
return -1;
}
offset++;
} else if (!ParserUtils.isAlphabetic(startingChar)) {
return -1;
}

for (int i = 1; i < identifier.length(); i++) {
if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(i))) {
return false;
// Parse the optional identifier_chars production.
while (offset < identifier.length()) {
if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(offset))) {
// Return the position of the character that stops the identifier.
// This is either an invalid case (e.g., isValidIdentifier), or
// just the marker needed for isValidNamespace to find '.'.
return offset;
}
offset++;
}

return true;
return offset;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -941,7 +941,8 @@ public void parsesValidVariableAccess() {
"${\nfoo\n}\n",
"${a}",
"${a_b_c}",
"${_}");
"${_a}",
"${__a}");

for (String expr : exprs) {
Selector.parse(expr);
Expand All @@ -956,7 +957,9 @@ public void detectsInvalidVariableAccess() {
"${}",
"$}",
"${a",
"${*}");
"${*}",
"${_}",
"${__}");

for (String expr : exprs) {
Assertions.assertThrows(SelectorSyntaxException.class, () -> Selector.parse(expr));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ public static Collection<Object[]> shapeIdData() {
{ "Mixed.case#name", false},
{ "_foo.baz#name", false},
{ "__foo.baz#name", false},
{ "a._.b#name", false},

// invalid namespaces
{ "#name", true},
Expand All @@ -179,6 +178,8 @@ public static Collection<Object[]> shapeIdData() {
{ ".name.space#name", true},
{ "name-space#name", true},
{ "1namespace.foo#name", true},
{ "a._.b#name", true},
{ "a.____.b#name", true},

// valid shape names
{ "ns.foo#shapename", false},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Parse error at line 3, column 19 near `#\n`: Expected a valid identifier character, but found '#'

metadata abc = __$#
metadata abc = aa$#

namespace foo.baz
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// Parse error at line 4, column 11 near `%I`: Expected a valid identifier character, but found '%'
namespace com.foo

structure %Invalid {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// Parse error at line 4, column 12 near ` `: Expected a valid identifier character, but found ' '
namespace com.foo

structure _ {}
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ $version: "1.0"

namespace smithy.example

use _$#Bar
use a$#Bar

0 comments on commit 3ca1ce2

Please sign in to comment.