Skip to content

Commit

Permalink
Add rudimentary form of error recovery to parser
Browse files Browse the repository at this point in the history
When we encounter a parser error, we'll now attempt some basic
error recovery by simply skipping token until we reach a token at
the start of a line that is an identifier, documentation comment,
or '@'. This should help with things like LSPs that still should be
able to utilize jump to definition even with a broken or in-progress
model.
  • Loading branch information
mtdowling committed Apr 14, 2023
1 parent 7080189 commit a4c906d
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -415,51 +415,85 @@ private void parseApplyStatement() {

private void parseFirstShapeStatement(SourceLocation possibleDocCommentLocation) {
if (tokenizer.getCurrentToken() != IdlToken.EOF) {
if (tokenizer.doesCurrentIdentifierStartWith('a')) {
parseApplyStatement();
} else {
List<IdlTraitParser.Result> traits;
boolean hasDocComment = tokenizer.getCurrentToken() == IdlToken.DOC_COMMENT;

if (possibleDocCommentLocation == null) {
traits = IdlTraitParser.parseDocsAndTraitsBeforeShape(tokenizer, resolver);
try {
if (tokenizer.doesCurrentIdentifierStartWith('a')) {
parseApplyStatement();
} else {
// In this case, this is the first shape encountered for a model file that doesn't have any
// use statements. We need to take the previously skipped documentation comments to parse
// potential use statements and apply them to this first shape.
String docLines = tokenizer.removePendingDocCommentLines();
traits = IdlTraitParser.parseDocsAndTraitsBeforeShape(tokenizer, resolver);
// Note that possibleDocCommentLocation is just a mark of where docs _could be_.
if (docLines != null) {
hasDocComment = true;
traits.add(new IdlTraitParser.Result(DocumentationTrait.ID.toString(),
new StringNode(docLines, possibleDocCommentLocation),
IdlTraitParser.TraitType.DOC_COMMENT));
List<IdlTraitParser.Result> traits;
boolean hasDocComment = tokenizer.getCurrentToken() == IdlToken.DOC_COMMENT;

if (possibleDocCommentLocation == null) {
traits = IdlTraitParser.parseDocsAndTraitsBeforeShape(tokenizer, resolver);
} else {
// In this case, this is the first shape encountered for a model file that doesn't have any
// use statements. We need to take the previously skipped documentation comments to parse
// potential use statements and apply them to this first shape.
String docLines = tokenizer.removePendingDocCommentLines();
traits = IdlTraitParser.parseDocsAndTraitsBeforeShape(tokenizer, resolver);
// Note that possibleDocCommentLocation is just a mark of where docs _could be_.
if (docLines != null) {
hasDocComment = true;
traits.add(new IdlTraitParser.Result(DocumentationTrait.ID.toString(),
new StringNode(docLines, possibleDocCommentLocation),
IdlTraitParser.TraitType.DOC_COMMENT));
}
}
}

if (parseShapeDefinition(traits, hasDocComment)) {
parseShape(traits);
if (parseShapeDefinition(traits, hasDocComment)) {
parseShape(traits);
}
}
} catch (ModelSyntaxException e) {
errorRecovery(e);
}
}
}

private void parseSubsequentShapeStatements() {
while (tokenizer.getCurrentToken() != IdlToken.EOF) {
if (tokenizer.doesCurrentIdentifierStartWith('a')) {
parseApplyStatement();
} else {
List<IdlTraitParser.Result> traits;
boolean hasDocComment = tokenizer.getCurrentToken() == IdlToken.DOC_COMMENT;
traits = IdlTraitParser.parseDocsAndTraitsBeforeShape(tokenizer, resolver);
if (parseShapeDefinition(traits, hasDocComment)) {
parseShape(traits);
while (tokenizer.hasNext()) {
try {
if (tokenizer.doesCurrentIdentifierStartWith('a')) {
parseApplyStatement();
} else {
List<IdlTraitParser.Result> traits;
boolean hasDocComment = tokenizer.getCurrentToken() == IdlToken.DOC_COMMENT;
traits = IdlTraitParser.parseDocsAndTraitsBeforeShape(tokenizer, resolver);
if (parseShapeDefinition(traits, hasDocComment)) {
parseShape(traits);
}
}
} catch (ModelSyntaxException e) {
errorRecovery(e);
}
}
}

private void errorRecovery(ModelSyntaxException e) {
if (!tokenizer.hasNext()) {
throw e;
}

// Here we do rudimentary error recovery to attempt to make sense of the remaining model.
// We do this by scanning tokens until we find the next identifier at the start of a line.
// This will skip over doc comments and traits of the next shape, but is a decent heuristic.
// The model is still invalid and will fail to validate, but things like IDEs should still be able
// to do things like jump to definition.
emit(ValidationEvent.fromSourceException(e));

do {
// Always skip the current token since it was the one that failed.
IdlToken token = tokenizer.next();
if (tokenizer.getCurrentTokenColumn() == 1 && isErrorRecoveryToken(token)) {
break;
}
} while (tokenizer.hasNext());
}

// These tokens are good signals that the next shape is starting.
private boolean isErrorRecoveryToken(IdlToken token) {
return token == IdlToken.IDENTIFIER || token == IdlToken.DOC_COMMENT || token == IdlToken.AT;
}

private boolean parseShapeDefinition(List<IdlTraitParser.Result> traits, boolean hasDocComment) {
if (tokenizer.getCurrentToken() != IdlToken.EOF) {
// Continue to parse if not at the end of the file.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ public IdlToken next() {

if (c == SimpleParser.EOF) {
if (emittedEof) {
throw new NoSuchElementException("Expected another token but traversed beyond EOF");
throw new NoSuchElementException("Expected another token but reached EOF");
} else {
emittedEof = true;
currentTokenEnd = parser.position();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -339,4 +339,38 @@ public void setsCorrectLocationForEnum() {
assertThat(fooBarMember.getSourceLocation().getLine(), is(7));
assertThat(fooBarMember.getSourceLocation().getColumn(), is(5));
}

@Test
public void doesBasicErrorRecovery() {
ValidatedResult<Model> result = Model.assembler()
.addImport(getClass().getResource("error-recovery.smithy"))
.assemble();

assertThat(result.isBroken(), is(true));
assertThat(result.getResult().isPresent(), is(true));

Model model = result.getResult().get();

assertThat(model.getShape(ShapeId.from("smithy.example#MyString")).isPresent(), is(true));
assertThat(model.getShape(ShapeId.from("smithy.example#MyFooIsBroken")).isPresent(), is(false));
assertThat(model.getShape(ShapeId.from("smithy.example#MyInteger")).isPresent(), is(false));
assertThat(model.getShape(ShapeId.from("smithy.example#MyInteger2")).isPresent(), is(true));

System.out.println(result.getValidationEvents());

boolean foundSyntax = false;
boolean foundTrait = false;
for (ValidationEvent e : result.getValidationEvents()) {
if (e.getSeverity() == Severity.ERROR && e.getMessage().contains(
"Syntax error at line 9, column 9: Expected COLON(':') but found IDENTIFIER('MyInteger')")) {
foundSyntax = true;
}
if (e.getSeverity() == Severity.ERROR && e.getMessage().contains("Unable to resolve trait")) {
foundTrait = true;
}
}

assertThat(foundSyntax, is(true));
assertThat(foundTrait, is(true));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
$version: "2.0"

namespace smithy.example

string MyString

structure MyFooIsBroken {
// The parser will keep trying to parse here, assuming integer is a key and needs to be followed by ":".
integer MyInteger

// When the above fails, error recovery kicks in, looking for the next token at the start of the line.
@unknown
integer MyInteger2

0 comments on commit a4c906d

Please sign in to comment.