Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disallow problematic identifiers #499

Merged
merged 1 commit into from
Jul 16, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Disallow problematic identifiers
These newly disallowed identifiers were never actually supported in
practice and are problematic on multiple levels:

1. An identifier like `_` or `__` are bad names that don't help users
   understand an API.
2. Identifiers like `_` are problematic for languages like Rust that use
   this character for pattern matching.
mtdowling committed Jul 16, 2020

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit 3ca1ce2190b928e76939345e89b5880bca40b970
6 changes: 6 additions & 0 deletions docs/source/1.0/spec/core/idl.rst
Original file line number Diff line number Diff line change
@@ -216,6 +216,12 @@ The Smithy IDL is defined by the following ABNF:
trait_structure_kvp :`node_object_key` `ws` ":" `ws` `node_value`
apply_statement :"apply" `ws` `shape_id` `ws` `trait` `br`

.. rubric:: Shape ID

.. seealso::

Refer to :ref:`shape-id` for the ABNF grammar of shape IDs.


.. _comments:

4 changes: 3 additions & 1 deletion docs/source/1.0/spec/core/model.rst
Original file line number Diff line number Diff line change
@@ -445,7 +445,9 @@ Shape IDs are formally defined by the following ABNF:
root_shape_id :`absolute_root_shape_id` / `identifier`
absolute_root_shape_id :`namespace` "#" `identifier`
namespace :`identifier` *("." `identifier`)
identifier :(ALPHA / "_") *(ALPHA / DIGIT / "_")
identifier :identifier_start *identifier_chars
identifier_start :*"_" ALPHA
identifier_chars :ALPHA / DIGIT / "_"
shape_id_member :"$" `identifier`

.. rubric:: Best practices for defining shape names
Original file line number Diff line number Diff line change
@@ -147,22 +147,37 @@ public static void consumeNamespace(SimpleParser parser) {
* Expects and skips over a Smithy identifier production.
*
* <pre>
* identifier = (ALPHA / "_") *(ALPHA / DIGIT / "_")
* identifier = identifier_start *identifier_chars
* identifier_start = *"_" ALPHA
* identifier_chars = ALPHA / DIGIT / "_"
* </pre>
*
* @param parser Parser to consume tokens from.
*/
public static void consumeIdentifier(SimpleParser parser) {
// (ALPHA / "_")
if (!isIdentifierStart(parser.peek())) {
throw parser.syntax("Expected a valid identifier character, but found '"
+ parser.peekSingleCharForMessage() + '\'');
// Parse identifier_start
char c = parser.peek();
if (c == '_') {
parser.consumeUntilNoLongerMatches(next -> next == '_');
if (!ParserUtils.isValidIdentifierCharacter(parser.peek())) {
throw invalidIdentifier(parser);
}
} else if (!isAlphabetic(c)) {
throw invalidIdentifier(parser);
}

// *(ALPHA / DIGIT / "_")
// Skip the first character since it's known to be valid.
parser.skip();

// Parse identifier_chars
parser.consumeUntilNoLongerMatches(ParserUtils::isValidIdentifierCharacter);
}

private static RuntimeException invalidIdentifier(SimpleParser parser) {
throw parser.syntax("Expected a valid identifier character, but found '"
+ parser.peekSingleCharForMessage() + '\'');
}

/**
* Returns true if the given character is allowed in an identifier.
*
@@ -180,7 +195,7 @@ public static boolean isValidIdentifierCharacter(char c) {
* @return Returns true if the character can start an identifier.
*/
public static boolean isIdentifierStart(char c) {
return c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
return c == '_' || isAlphabetic(c);
}

/**
@@ -192,4 +207,15 @@ public static boolean isIdentifierStart(char c) {
public static boolean isDigit(char c) {
return c >= '0' && c <= '9';
}

/**
* Returns true if the given character is an alphabetic character
* A-Z, a-z. This is a stricter version of {@link Character#isAlphabetic}.
*
* @param c Character to check.
* @return Returns true if the character is an alphabetic character.
*/
public static boolean isAlphabetic(char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
}
Original file line number Diff line number Diff line change
@@ -85,29 +85,29 @@ public static ShapeId from(String id) {
public static boolean isValidNamespace(CharSequence namespace) {
if (namespace == null) {
return false;
} else if (namespace.equals(Prelude.NAMESPACE)) {
// Shortcut for prelude namespaces.
return true;
}

// Shortcut for prelude namespaces.
if (namespace.equals(Prelude.NAMESPACE)) {
return true;
int length = namespace.length();
if (length == 0) {
return false;
}

boolean start = true;
for (int position = 0; position < namespace.length(); position++) {
char c = namespace.charAt(position);
if (start) {
start = false;
if (!ParserUtils.isIdentifierStart(c)) {
return false;
}
} else if (c == '.') {
start = true;
} else if (!ParserUtils.isValidIdentifierCharacter(c)) {
int position = 0;
while (true) {
position = parseIdentifier(namespace, position);
if (position == -1) { // Bad: did not parse a valid identifier.
return false;
}
} else if (position == length) { // Good: parsed and reached the end.
return true;
} else if (namespace.charAt(position) != '.') { // Bad: invalid character.
return false;
} else if (++position >= length) { // Bad: trailing '.'
return false;
} // continue parsing after '.', expecting an identifier.
}

return !start;
}

/**
@@ -117,21 +117,40 @@ public static boolean isValidNamespace(CharSequence namespace) {
* @return Returns true if this is a valid identifier.
*/
public static boolean isValidIdentifier(CharSequence identifier) {
if (identifier == null || identifier.length() == 0) {
return false;
return parseIdentifier(identifier, 0) == identifier.length();
}

private static int parseIdentifier(CharSequence identifier, int offset) {
if (identifier == null || identifier.length() <= offset) {
return -1;
}

if (!ParserUtils.isIdentifierStart(identifier.charAt(0))) {
return false;
// Parse the required identifier_start production.
char startingChar = identifier.charAt(offset);
if (startingChar == '_') {
while (identifier.charAt(offset) == '_') {
offset++;
}
if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(offset))) {
return -1;
}
offset++;
} else if (!ParserUtils.isAlphabetic(startingChar)) {
return -1;
}

for (int i = 1; i < identifier.length(); i++) {
if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(i))) {
return false;
// Parse the optional identifier_chars production.
while (offset < identifier.length()) {
if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(offset))) {
// Return the position of the character that stops the identifier.
// This is either an invalid case (e.g., isValidIdentifier), or
// just the marker needed for isValidNamespace to find '.'.
return offset;
}
offset++;
}

return true;
return offset;
}

/**
Original file line number Diff line number Diff line change
@@ -941,7 +941,8 @@ public void parsesValidVariableAccess() {
"${\nfoo\n}\n",
"${a}",
"${a_b_c}",
"${_}");
"${_a}",
"${__a}");

for (String expr : exprs) {
Selector.parse(expr);
@@ -956,7 +957,9 @@ public void detectsInvalidVariableAccess() {
"${}",
"$}",
"${a",
"${*}");
"${*}",
"${_}",
"${__}");

for (String expr : exprs) {
Assertions.assertThrows(SelectorSyntaxException.class, () -> Selector.parse(expr));
Original file line number Diff line number Diff line change
@@ -170,7 +170,6 @@ public static Collection<Object[]> shapeIdData() {
{ "Mixed.case#name", false},
{ "_foo.baz#name", false},
{ "__foo.baz#name", false},
{ "a._.b#name", false},

// invalid namespaces
{ "#name", true},
@@ -179,6 +178,8 @@ public static Collection<Object[]> shapeIdData() {
{ ".name.space#name", true},
{ "name-space#name", true},
{ "1namespace.foo#name", true},
{ "a._.b#name", true},
{ "a.____.b#name", true},

// valid shape names
{ "ns.foo#shapename", false},
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Parse error at line 3, column 19 near `#\n`: Expected a valid identifier character, but found '#'

metadata abc = __$#
metadata abc = aa$#

namespace foo.baz
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// Parse error at line 4, column 11 near `%I`: Expected a valid identifier character, but found '%'
namespace com.foo

structure %Invalid {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// Parse error at line 4, column 12 near ` `: Expected a valid identifier character, but found ' '
namespace com.foo

structure _ {}
Original file line number Diff line number Diff line change
@@ -3,4 +3,4 @@ $version: "1.0"

namespace smithy.example

use _$#Bar
use a$#Bar