From 95e0dc6061f32bea64ceb7eff3f262e69a35528b Mon Sep 17 00:00:00 2001 From: Michael Dowling Date: Fri, 12 Jun 2020 23:46:39 -0700 Subject: [PATCH] Provide suggestions for invalid shape ID targets When an invalid shape ID target is encountered, a "Did you mean X?" list of shape IDs are suggested if any shape IDs in a model have a Levenshtein edit distance of less than 2 characters. The shape IDs with the lowest distance from the invalid shape ID are suggested. Closes #464. This change also updates the error message for "shape has a/an `X` relationship..." so that is uses the appropriate indefinte articale based on if the relation type starts with a vowel. --- .../validators/TargetValidator.java | 57 +++++- .../target-validator-suggestions.errors | 4 + .../target-validator-suggestions.json | 49 +++++ .../validators/target-validator.errors | 8 +- .../amazon/smithy/utils/StringUtils.java | 176 ++++++++++++++++++ 5 files changed, 285 insertions(+), 9 deletions(-) create mode 100644 smithy-model/src/test/resources/software/amazon/smithy/model/errorfiles/validators/target-validator-suggestions.errors create mode 100644 smithy-model/src/test/resources/software/amazon/smithy/model/errorfiles/validators/target-validator-suggestions.json diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/validation/validators/TargetValidator.java b/smithy-model/src/main/java/software/amazon/smithy/model/validation/validators/TargetValidator.java index 24c76fa5e8c..cdee5a86163 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/validation/validators/TargetValidator.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/validation/validators/TargetValidator.java @@ -17,10 +17,12 @@ import static java.lang.String.format; +import java.util.Collection; import java.util.List; import java.util.Locale; import java.util.Optional; import java.util.Set; +import java.util.TreeSet; import java.util.stream.Collectors; import java.util.stream.Stream; import software.amazon.smithy.model.Model; @@ -38,12 +40,14 @@ import software.amazon.smithy.utils.FunctionalUtils; import software.amazon.smithy.utils.OptionalUtils; import software.amazon.smithy.utils.SetUtils; +import software.amazon.smithy.utils.StringUtils; /** * Validates that neighbors target resolvable shapes of the correct type. */ public final class TargetValidator extends AbstractValidator { + private static final int MAX_EDIT_DISTANCE_FOR_SUGGESTIONS = 2; private static final Set INVALID_MEMBER_TARGETS = SetUtils.of( ShapeType.SERVICE, ShapeType.RESOURCE, ShapeType.OPERATION, ShapeType.MEMBER); @@ -61,7 +65,7 @@ private Stream validateShape(Model model, Shape shape, List validateIdentifier(Shape shape, Shape target) } } - private ValidationEvent unresolvedTarget(Shape shape, Relationship rel) { + private ValidationEvent unresolvedTarget(Model model, Shape shape, Relationship rel) { + // Detect if there are shape IDs within 2 characters edit distance from the + // invalid shape ID target. An edit distance > 2 seems to give far more false + // positives than are useful. + Collection suggestions = computeTargetSuggestions(model, rel.getNeighborShapeId()); + String suggestionText = !suggestions.isEmpty() + ? ". Did you mean " + String.join(", ", suggestions) + "?" + : ""; + if (rel.getRelationshipType() == RelationshipType.MEMBER_TARGET) { + // Don't show the relationship type for invalid member targets. return error(shape, String.format( - "member shape targets an unresolved shape `%s`", rel.getNeighborShapeId())); + "member shape targets an unresolved shape `%s`%s", rel.getNeighborShapeId(), suggestionText)); } else { + // Use "a" or "an" depending on if the relationship starts with a vowel. + String indefiniteArticle = isUppercaseVowel(rel.getRelationshipType().toString().charAt(0)) + ? "an" + : "a"; return error(shape, String.format( - "%s shape has a `%s` relationship to an unresolved shape `%s`", + "%s shape has %s `%s` relationship to an unresolved shape `%s`%s", shape.getType(), + indefiniteArticle, rel.getRelationshipType().toString().toLowerCase(Locale.US), - rel.getNeighborShapeId())); + rel.getNeighborShapeId(), + suggestionText)); + } + } + + private Collection computeTargetSuggestions(Model model, ShapeId target) { + String targetString = target.toString(); + int floor = Integer.MAX_VALUE; + // Sort the result to create deterministic error messages. + Set candidates = new TreeSet<>(); + + for (Shape shape : model.toSet()) { + String idString = shape.getId().toString(); + int distance = StringUtils.levenshteinDistance(targetString, idString, MAX_EDIT_DISTANCE_FOR_SUGGESTIONS); + if (distance == floor) { + // Add to the list of candidates that have the same distance. + candidates.add(idString); + } else if (distance > -1 && distance < floor) { + // Found a new ID that has a lower distance. Set the new floor, + // clear out the previous candidates, and add this ID. + floor = distance; + candidates.clear(); + candidates.add(idString); + } } + + return candidates; + } + + private static boolean isUppercaseVowel(char c) { + return c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U'; } private ValidationEvent badType(Shape shape, Shape target, RelationshipType rel, ShapeType valid) { diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/errorfiles/validators/target-validator-suggestions.errors b/smithy-model/src/test/resources/software/amazon/smithy/model/errorfiles/validators/target-validator-suggestions.errors new file mode 100644 index 00000000000..3204393d867 --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/errorfiles/validators/target-validator-suggestions.errors @@ -0,0 +1,4 @@ +[ERROR] ns.foo#InvalidList1$member: member shape targets an unresolved shape `ns.foo#d`. Did you mean ns.foo#a, ns.foo#b, ns.foo#c? | Target +[ERROR] ns.foo#InvalidList2$member: member shape targets an unresolved shape `ns.foo#dd`. Did you mean ns.foo#a, ns.foo#ab, ns.foo#b, ns.foo#c? | Target +[ERROR] ns.foo#InvalidList3$member: member shape targets an unresolved shape `ns.foo#acb`. Did you mean ns.foo#ab? | Target +[ERROR] ns.foo#Operation: operation shape has an `input` relationship to an unresolved shape `ns.foo#abcd`. Did you mean ns.foo#abc? | Target diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/errorfiles/validators/target-validator-suggestions.json b/smithy-model/src/test/resources/software/amazon/smithy/model/errorfiles/validators/target-validator-suggestions.json new file mode 100644 index 00000000000..0b0945ee2bc --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/errorfiles/validators/target-validator-suggestions.json @@ -0,0 +1,49 @@ +{ + "smithy": "1.0", + "metadata": { + "suppressions": [ + {"id": "UnreferencedShape", "namespace": "ns.foo"} + ] + }, + "shapes": { + "ns.foo#a": { + "type": "string" + }, + "ns.foo#b": { + "type": "string" + }, + "ns.foo#c": { + "type": "string" + }, + "ns.foo#ab": { + "type": "string" + }, + "ns.foo#abc": { + "type": "string" + }, + "ns.foo#InvalidList1": { + "type": "list", + "member": { + "target": "ns.foo#d" + } + }, + "ns.foo#InvalidList2": { + "type": "list", + "member": { + "target": "ns.foo#dd" + } + }, + "ns.foo#InvalidList3": { + "type": "list", + "member": { + "target": "ns.foo#acb" + } + }, + "ns.foo#Operation": { + "type": "operation", + "input": { + "target": "ns.foo#abcd" + } + } + } +} diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/errorfiles/validators/target-validator.errors b/smithy-model/src/test/resources/software/amazon/smithy/model/errorfiles/validators/target-validator.errors index 98402235a9c..d643adce48c 100644 --- a/smithy-model/src/test/resources/software/amazon/smithy/model/errorfiles/validators/target-validator.errors +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/errorfiles/validators/target-validator.errors @@ -1,9 +1,9 @@ [ERROR] ns.foo#InalidOperationInputOutputErrorBadTypes: operation shape `error` relationships must target a structure shape, but found (integer: `ns.foo#Integer`) | Target [ERROR] ns.foo#InalidOperationInputOutputErrorBadTypes: operation shape `input` relationships must target a structure shape, but found (integer: `ns.foo#Integer`) | Target [ERROR] ns.foo#InalidOperationInputOutputErrorBadTypes: operation shape `output` relationships must target a structure shape, but found (integer: `ns.foo#Integer`) | Target -[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has a `error` relationship to an unresolved shape `ns.foo#NotFound` | Target -[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has a `input` relationship to an unresolved shape `ns.foo#NotFound` | Target -[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has a `output` relationship to an unresolved shape `ns.foo#NotFound` | Target +[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has an `error` relationship to an unresolved shape `ns.foo#NotFound` | Target +[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has an `input` relationship to an unresolved shape `ns.foo#NotFound` | Target +[ERROR] ns.foo#InalidOperationInputOutputErrorNotFound: operation shape has an `output` relationship to an unresolved shape `ns.foo#NotFound` | Target [ERROR] ns.foo#InvalidListMemberMember$member: Members cannot target member shapes, but found (member: `ns.foo#ValidInput$integer`) | Target [ERROR] ns.foo#InvalidListMemberReference$member: member shape targets an unresolved shape `ns.foo#NotFound` | Target [ERROR] ns.foo#InvalidListMemberResource$member: Members cannot target resource shapes, but found (resource: `ns.foo#MyResource`) | Target @@ -14,7 +14,7 @@ [ERROR] ns.foo#InvalidOperationBadErrorTraits: Operation output targets an invalid structure `ns.foo#ValidError` that is marked with the `error` trait. | Target [ERROR] ns.foo#InvalidResourceBindingReference: resource shape has a `resource` relationship to an unresolved shape `ns.foo#NotFound` | Target [ERROR] ns.foo#InvalidResourceBindingType: resource shape `resource` relationships must target a resource shape, but found (integer: `ns.foo#Integer`) | Target -[ERROR] ns.foo#InvalidResourceIdentifierReference: resource shape has a `identifier` relationship to an unresolved shape `ns.foo#NotFound` | Target +[ERROR] ns.foo#InvalidResourceIdentifierReference: resource shape has an `identifier` relationship to an unresolved shape `ns.foo#NotFound` | Target [ERROR] ns.foo#InvalidResourceIdentifierType: resource shape `identifier` relationships must target a string shape, but found (integer: `ns.foo#Integer`) | Target [ERROR] ns.foo#InvalidResourceLifecycle: Resource create lifecycle operation must target an operation, but found (integer: `ns.foo#Integer`) | Target [ERROR] ns.foo#InvalidResourceLifecycle: Resource delete lifecycle operation must target an operation, but found (integer: `ns.foo#Integer`) | Target diff --git a/smithy-utils/src/main/java/software/amazon/smithy/utils/StringUtils.java b/smithy-utils/src/main/java/software/amazon/smithy/utils/StringUtils.java index 6157e9ec809..f07c658f43b 100644 --- a/smithy-utils/src/main/java/software/amazon/smithy/utils/StringUtils.java +++ b/smithy-utils/src/main/java/software/amazon/smithy/utils/StringUtils.java @@ -15,6 +15,7 @@ package software.amazon.smithy.utils; +import java.util.Arrays; import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -1429,4 +1430,179 @@ public static String escapeJavaString(Object object, String indent) { result.append('"'); return result.toString(); } + + /** + * Find the Levenshtein distance between two CharSequences if it's less than or + * equal to a given threshold. + * + *

This code is a copy of the {@code limitedCompare} method from + * Apache commons-text LevenshteinDistance.java. + * + *

+ * This implementation follows from Algorithms on Strings, Trees and + * Sequences by Dan Gusfield and Chas Emerick's implementation of the + * Levenshtein distance algorithm from http://www.merriampark.com/ld.htm + *

+ * + *
+     * limitedCompare(null, *, *)             = IllegalArgumentException
+     * limitedCompare(*, null, *)             = IllegalArgumentException
+     * limitedCompare(*, *, -1)               = IllegalArgumentException
+     * limitedCompare("","", 0)               = 0
+     * limitedCompare("aaapppp", "", 8)       = 7
+     * limitedCompare("aaapppp", "", 7)       = 7
+     * limitedCompare("aaapppp", "", 6))      = -1
+     * limitedCompare("elephant", "hippo", 7) = 7
+     * limitedCompare("elephant", "hippo", 6) = -1
+     * limitedCompare("hippo", "elephant", 7) = 7
+     * limitedCompare("hippo", "elephant", 6) = -1
+     * 
+ * + * @param left the first CharSequence, must not be null + * @param right the second CharSequence, must not be null + * @param threshold the target threshold, must not be negative + * @return result distance, or -1 + * @see LevenshteinDistance.java + */ + public static int levenshteinDistance(CharSequence left, CharSequence right, final int threshold) { + if (left == null || right == null) { + throw new IllegalArgumentException("CharSequences must not be null"); + } + if (threshold < 0) { + throw new IllegalArgumentException("Threshold must not be negative"); + } + + /* + * This implementation only computes the distance if it's less than or + * equal to the threshold value, returning -1 if it's greater. The + * advantage is performance: unbounded distance is O(nm), but a bound of + * k allows us to reduce it to O(km) time by only computing a diagonal + * stripe of width 2k + 1 of the cost table. It is also possible to use + * this to compute the unbounded Levenshtein distance by starting the + * threshold at 1 and doubling each time until the distance is found; + * this is O(dm), where d is the distance. + * + * One subtlety comes from needing to ignore entries on the border of + * our stripe eg. p[] = |#|#|#|* d[] = *|#|#|#| We must ignore the entry + * to the left of the leftmost member We must ignore the entry above the + * rightmost member + * + * Another subtlety comes from our stripe running off the matrix if the + * strings aren't of the same size. Since string s is always swapped to + * be the shorter of the two, the stripe will always run off to the + * upper right instead of the lower left of the matrix. + * + * As a concrete example, suppose s is of length 5, t is of length 7, + * and our threshold is 1. In this case we're going to walk a stripe of + * length 3. The matrix would look like so: + * + *
+         *    1 2 3 4 5
+         * 1 |#|#| | | |
+         * 2 |#|#|#| | |
+         * 3 | |#|#|#| |
+         * 4 | | |#|#|#|
+         * 5 | | | |#|#|
+         * 6 | | | | |#|
+         * 7 | | | | | |
+         * 
+ * + * Note how the stripe leads off the table as there is no possible way + * to turn a string of length 5 into one of length 7 in edit distance of + * 1. + * + * Additionally, this implementation decreases memory usage by using two + * single-dimensional arrays and swapping them back and forth instead of + * allocating an entire n by m matrix. This requires a few minor + * changes, such as immediately returning when it's detected that the + * stripe has run off the matrix and initially filling the arrays with + * large values so that entries we don't compute are ignored. + * + * See Algorithms on Strings, Trees and Sequences by Dan Gusfield for + * some discussion. + */ + + int n = left.length(); // length of left + int m = right.length(); // length of right + + // if one string is empty, the edit distance is necessarily the length + // of the other + if (n == 0) { + return m <= threshold ? m : -1; + } else if (m == 0) { + return n <= threshold ? n : -1; + } + + if (n > m) { + // swap the two strings to consume less memory + final CharSequence tmp = left; + left = right; + right = tmp; + n = m; + m = right.length(); + } + + // the edit distance cannot be less than the length difference + if (m - n > threshold) { + return -1; + } + + int[] p = new int[n + 1]; // 'previous' cost array, horizontally + int[] d = new int[n + 1]; // cost array, horizontally + int[] tempD; // placeholder to assist in swapping p and d + + // fill in starting table values + final int boundary = Math.min(n, threshold) + 1; + for (int i = 0; i < boundary; i++) { + p[i] = i; + } + // these fills ensure that the value above the rightmost entry of our + // stripe will be ignored in following loop iterations + Arrays.fill(p, boundary, p.length, Integer.MAX_VALUE); + Arrays.fill(d, Integer.MAX_VALUE); + + // iterates through t + for (int j = 1; j <= m; j++) { + final char rightJ = right.charAt(j - 1); // jth character of right + d[0] = j; + + // compute stripe indices, constrain to array size + final int min = Math.max(1, j - threshold); + final int max = j > Integer.MAX_VALUE - threshold ? n : Math.min( + n, j + threshold); + + // ignore entry left of leftmost + if (min > 1) { + d[min - 1] = Integer.MAX_VALUE; + } + + // iterates through [min, max] in s + for (int i = min; i <= max; i++) { + if (left.charAt(i - 1) == rightJ) { + // diagonally left and up + d[i] = p[i - 1]; + } else { + // 1 + minimum of cell to the left, to the top, diagonally + // left and up + d[i] = 1 + Math.min(Math.min(d[i - 1], p[i]), p[i - 1]); + } + } + + // copy current distance counts to 'previous row' distance counts + tempD = p; + p = d; + d = tempD; + } + + // if p[n] is greater than the threshold, there's no guarantee on it + // being the correct + // distance + if (p[n] <= threshold) { + return p[n]; + } + + return -1; + } }