Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide suggestions for invalid shape ID targets #466

Merged
merged 1 commit into from
Jun 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@

import static java.lang.String.format;

import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import software.amazon.smithy.model.Model;
Expand All @@ -38,12 +40,14 @@
import software.amazon.smithy.utils.FunctionalUtils;
import software.amazon.smithy.utils.OptionalUtils;
import software.amazon.smithy.utils.SetUtils;
import software.amazon.smithy.utils.StringUtils;

/**
* Validates that neighbors target resolvable shapes of the correct type.
*/
public final class TargetValidator extends AbstractValidator {

private static final int MAX_EDIT_DISTANCE_FOR_SUGGESTIONS = 2;
private static final Set<ShapeType> INVALID_MEMBER_TARGETS = SetUtils.of(
ShapeType.SERVICE, ShapeType.RESOURCE, ShapeType.OPERATION, ShapeType.MEMBER);

Expand All @@ -61,7 +65,7 @@ private Stream<ValidationEvent> validateShape(Model model, Shape shape, List<Rel
return OptionalUtils.stream(
validateTarget(model, shape, relationship.getNeighborShape().get(), relationship));
} else {
return Stream.of(unresolvedTarget(shape, relationship));
return Stream.of(unresolvedTarget(model, shape, relationship));
}
});
}
Expand Down Expand Up @@ -148,17 +152,60 @@ private Optional<ValidationEvent> validateIdentifier(Shape shape, Shape target)
}
}

private ValidationEvent unresolvedTarget(Shape shape, Relationship rel) {
private ValidationEvent unresolvedTarget(Model model, Shape shape, Relationship rel) {
// Detect if there are shape IDs within 2 characters edit distance from the
// invalid shape ID target. An edit distance > 2 seems to give far more false
// positives than are useful.
Collection<String> suggestions = computeTargetSuggestions(model, rel.getNeighborShapeId());
String suggestionText = !suggestions.isEmpty()
? ". Did you mean " + String.join(", ", suggestions) + "?"
: "";

if (rel.getRelationshipType() == RelationshipType.MEMBER_TARGET) {
// Don't show the relationship type for invalid member targets.
return error(shape, String.format(
"member shape targets an unresolved shape `%s`", rel.getNeighborShapeId()));
"member shape targets an unresolved shape `%s`%s", rel.getNeighborShapeId(), suggestionText));
} else {
// Use "a" or "an" depending on if the relationship starts with a vowel.
String indefiniteArticle = isUppercaseVowel(rel.getRelationshipType().toString().charAt(0))
? "an"
: "a";
return error(shape, String.format(
"%s shape has a `%s` relationship to an unresolved shape `%s`",
"%s shape has %s `%s` relationship to an unresolved shape `%s`%s",
shape.getType(),
indefiniteArticle,
rel.getRelationshipType().toString().toLowerCase(Locale.US),
rel.getNeighborShapeId()));
rel.getNeighborShapeId(),
suggestionText));
}
}

private Collection<String> computeTargetSuggestions(Model model, ShapeId target) {
String targetString = target.toString();
int floor = Integer.MAX_VALUE;
// Sort the result to create deterministic error messages.
Set<String> candidates = new TreeSet<>();

for (Shape shape : model.toSet()) {
String idString = shape.getId().toString();
int distance = StringUtils.levenshteinDistance(targetString, idString, MAX_EDIT_DISTANCE_FOR_SUGGESTIONS);
if (distance == floor) {
// Add to the list of candidates that have the same distance.
candidates.add(idString);
} else if (distance > -1 && distance < floor) {
// Found a new ID that has a lower distance. Set the new floor,
// clear out the previous candidates, and add this ID.
floor = distance;
candidates.clear();
candidates.add(idString);
}
}

return candidates;
}

private static boolean isUppercaseVowel(char c) {
return c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U';
}

private ValidationEvent badType(Shape shape, Shape target, RelationshipType rel, ShapeType valid) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[ERROR] ns.foo#InvalidList1$member: member shape targets an unresolved shape `ns.foo#d`. Did you mean ns.foo#a, ns.foo#b, ns.foo#c? | Target
[ERROR] ns.foo#InvalidList2$member: member shape targets an unresolved shape `ns.foo#dd`. Did you mean ns.foo#a, ns.foo#ab, ns.foo#b, ns.foo#c? | Target
[ERROR] ns.foo#InvalidList3$member: member shape targets an unresolved shape `ns.foo#acb`. Did you mean ns.foo#ab? | Target
[ERROR] ns.foo#Operation: operation shape has an `input` relationship to an unresolved shape `ns.foo#abcd`. Did you mean ns.foo#abc? | Target
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"smithy": "1.0",
"metadata": {
"suppressions": [
{"id": "UnreferencedShape", "namespace": "ns.foo"}
]
},
"shapes": {
"ns.foo#a": {
"type": "string"
},
"ns.foo#b": {
"type": "string"
},
"ns.foo#c": {
"type": "string"
},
"ns.foo#ab": {
"type": "string"
},
"ns.foo#abc": {
"type": "string"
},
"ns.foo#InvalidList1": {
"type": "list",
"member": {
"target": "ns.foo#d"
}
},
"ns.foo#InvalidList2": {
"type": "list",
"member": {
"target": "ns.foo#dd"
}
},
"ns.foo#InvalidList3": {
"type": "list",
"member": {
"target": "ns.foo#acb"
}
},
"ns.foo#Operation": {
"type": "operation",
"input": {
"target": "ns.foo#abcd"
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[ERROR] ns.foo#InalidOperationInputOutputErrorBadTypes: operation shape `error` relationships must target a structure shape, but found (integer: `ns.foo#Integer`) | Target
[ERROR] ns.foo#InalidOperationInputOutputErrorBadTypes: operation shape `input` relationships must target a structure shape, but found (integer: `ns.foo#Integer`) | Target
[ERROR] ns.foo#InalidOperationInputOutputErrorBadTypes: operation shape `output` relationships must target a structure shape, but found (integer: `ns.foo#Integer`) | Target
[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has a `error` relationship to an unresolved shape `ns.foo#NotFound` | Target
[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has a `input` relationship to an unresolved shape `ns.foo#NotFound` | Target
[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has a `output` relationship to an unresolved shape `ns.foo#NotFound` | Target
[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has an `error` relationship to an unresolved shape `ns.foo#NotFound` | Target
[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has an `input` relationship to an unresolved shape `ns.foo#NotFound` | Target
[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has an `output` relationship to an unresolved shape `ns.foo#NotFound` | Target
[ERROR] ns.foo#InvalidListMemberMember$member: Members cannot target member shapes, but found (member: `ns.foo#ValidInput$integer`) | Target
[ERROR] ns.foo#InvalidListMemberReference$member: member shape targets an unresolved shape `ns.foo#NotFound` | Target
[ERROR] ns.foo#InvalidListMemberResource$member: Members cannot target resource shapes, but found (resource: `ns.foo#MyResource`) | Target
Expand All @@ -14,7 +14,7 @@
[ERROR] ns.foo#InvalidOperationBadErrorTraits: Operation output targets an invalid structure `ns.foo#ValidError` that is marked with the `error` trait. | Target
[ERROR] ns.foo#InvalidResourceBindingReference: resource shape has a `resource` relationship to an unresolved shape `ns.foo#NotFound` | Target
[ERROR] ns.foo#InvalidResourceBindingType: resource shape `resource` relationships must target a resource shape, but found (integer: `ns.foo#Integer`) | Target
[ERROR] ns.foo#InvalidResourceIdentifierReference: resource shape has a `identifier` relationship to an unresolved shape `ns.foo#NotFound` | Target
[ERROR] ns.foo#InvalidResourceIdentifierReference: resource shape has an `identifier` relationship to an unresolved shape `ns.foo#NotFound` | Target
[ERROR] ns.foo#InvalidResourceIdentifierType: resource shape `identifier` relationships must target a string shape, but found (integer: `ns.foo#Integer`) | Target
[ERROR] ns.foo#InvalidResourceLifecycle: Resource create lifecycle operation must target an operation, but found (integer: `ns.foo#Integer`) | Target
[ERROR] ns.foo#InvalidResourceLifecycle: Resource delete lifecycle operation must target an operation, but found (integer: `ns.foo#Integer`) | Target
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

package software.amazon.smithy.utils;

import java.util.Arrays;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -1429,4 +1430,179 @@ public static String escapeJavaString(Object object, String indent) {
result.append('"');
return result.toString();
}

/**
* Find the Levenshtein distance between two CharSequences if it's less than or
* equal to a given threshold.
*
* <p>This code is a copy of the {@code limitedCompare} method from
* Apache commons-text LevenshteinDistance.java.
*
* <p>
* This implementation follows from Algorithms on Strings, Trees and
* Sequences by Dan Gusfield and Chas Emerick's implementation of the
* Levenshtein distance algorithm from <a
* href="http://www.merriampark.com/ld.htm"
* >http://www.merriampark.com/ld.htm</a>
* </p>
*
* <pre>
* limitedCompare(null, *, *) = IllegalArgumentException
* limitedCompare(*, null, *) = IllegalArgumentException
* limitedCompare(*, *, -1) = IllegalArgumentException
* limitedCompare("","", 0) = 0
* limitedCompare("aaapppp", "", 8) = 7
* limitedCompare("aaapppp", "", 7) = 7
* limitedCompare("aaapppp", "", 6)) = -1
* limitedCompare("elephant", "hippo", 7) = 7
* limitedCompare("elephant", "hippo", 6) = -1
* limitedCompare("hippo", "elephant", 7) = 7
* limitedCompare("hippo", "elephant", 6) = -1
* </pre>
*
* @param left the first CharSequence, must not be null
* @param right the second CharSequence, must not be null
* @param threshold the target threshold, must not be negative
* @return result distance, or -1
* @see <a href="https://github.com/apache/commons-text/blob/master/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java">LevenshteinDistance.java</a>
*/
public static int levenshteinDistance(CharSequence left, CharSequence right, final int threshold) {
if (left == null || right == null) {
throw new IllegalArgumentException("CharSequences must not be null");
}
if (threshold < 0) {
throw new IllegalArgumentException("Threshold must not be negative");
}

/*
* This implementation only computes the distance if it's less than or
* equal to the threshold value, returning -1 if it's greater. The
* advantage is performance: unbounded distance is O(nm), but a bound of
* k allows us to reduce it to O(km) time by only computing a diagonal
* stripe of width 2k + 1 of the cost table. It is also possible to use
* this to compute the unbounded Levenshtein distance by starting the
* threshold at 1 and doubling each time until the distance is found;
* this is O(dm), where d is the distance.
*
* One subtlety comes from needing to ignore entries on the border of
* our stripe eg. p[] = |#|#|#|* d[] = *|#|#|#| We must ignore the entry
* to the left of the leftmost member We must ignore the entry above the
* rightmost member
*
* Another subtlety comes from our stripe running off the matrix if the
* strings aren't of the same size. Since string s is always swapped to
* be the shorter of the two, the stripe will always run off to the
* upper right instead of the lower left of the matrix.
*
* As a concrete example, suppose s is of length 5, t is of length 7,
* and our threshold is 1. In this case we're going to walk a stripe of
* length 3. The matrix would look like so:
*
* <pre>
* 1 2 3 4 5
* 1 |#|#| | | |
* 2 |#|#|#| | |
* 3 | |#|#|#| |
* 4 | | |#|#|#|
* 5 | | | |#|#|
* 6 | | | | |#|
* 7 | | | | | |
* </pre>
*
* Note how the stripe leads off the table as there is no possible way
* to turn a string of length 5 into one of length 7 in edit distance of
* 1.
*
* Additionally, this implementation decreases memory usage by using two
* single-dimensional arrays and swapping them back and forth instead of
* allocating an entire n by m matrix. This requires a few minor
* changes, such as immediately returning when it's detected that the
* stripe has run off the matrix and initially filling the arrays with
* large values so that entries we don't compute are ignored.
*
* See Algorithms on Strings, Trees and Sequences by Dan Gusfield for
* some discussion.
*/

int n = left.length(); // length of left
int m = right.length(); // length of right

// if one string is empty, the edit distance is necessarily the length
// of the other
if (n == 0) {
return m <= threshold ? m : -1;
} else if (m == 0) {
return n <= threshold ? n : -1;
}

if (n > m) {
// swap the two strings to consume less memory
final CharSequence tmp = left;
left = right;
right = tmp;
n = m;
m = right.length();
}

// the edit distance cannot be less than the length difference
if (m - n > threshold) {
return -1;
}

int[] p = new int[n + 1]; // 'previous' cost array, horizontally
int[] d = new int[n + 1]; // cost array, horizontally
int[] tempD; // placeholder to assist in swapping p and d

// fill in starting table values
final int boundary = Math.min(n, threshold) + 1;
for (int i = 0; i < boundary; i++) {
p[i] = i;
}
// these fills ensure that the value above the rightmost entry of our
// stripe will be ignored in following loop iterations
Arrays.fill(p, boundary, p.length, Integer.MAX_VALUE);
Arrays.fill(d, Integer.MAX_VALUE);

// iterates through t
for (int j = 1; j <= m; j++) {
final char rightJ = right.charAt(j - 1); // jth character of right
d[0] = j;

// compute stripe indices, constrain to array size
final int min = Math.max(1, j - threshold);
final int max = j > Integer.MAX_VALUE - threshold ? n : Math.min(
n, j + threshold);

// ignore entry left of leftmost
if (min > 1) {
d[min - 1] = Integer.MAX_VALUE;
}

// iterates through [min, max] in s
for (int i = min; i <= max; i++) {
if (left.charAt(i - 1) == rightJ) {
// diagonally left and up
d[i] = p[i - 1];
} else {
// 1 + minimum of cell to the left, to the top, diagonally
// left and up
d[i] = 1 + Math.min(Math.min(d[i - 1], p[i]), p[i - 1]);
}
}

// copy current distance counts to 'previous row' distance counts
tempD = p;
p = d;
d = tempD;
}

// if p[n] is greater than the threshold, there's no guarantee on it
// being the correct
// distance
if (p[n] <= threshold) {
return p[n];
}

return -1;
}
}