From 2a0022776f9339fef269805dc3a8b47035ccceff Mon Sep 17 00:00:00 2001 From: Michael Dowling Date: Fri, 14 Apr 2023 16:03:46 -0700 Subject: [PATCH] Add rudimentary form of error recovery to parser When we encounter a parser error, we'll now attempt some basic error recovery by simply skipping token until we reach a token at the start of a line that is an identifier, documentation comment, or '@'. This should help with things like LSPs that still should be able to utilize jump to definition even with a broken or in-progress model. --- .../smithy/model/loader/IdlModelLoader.java | 95 +++++++++++++------ .../smithy/model/loader/IdlTokenizer.java | 2 +- .../model/loader/IdlModelLoaderTest.java | 34 +++++++ .../smithy/model/loader/error-recovery.smithy | 13 +++ 4 files changed, 112 insertions(+), 32 deletions(-) create mode 100644 smithy-model/src/test/resources/software/amazon/smithy/model/loader/error-recovery.smithy diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlModelLoader.java b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlModelLoader.java index 187edded716..56f847b3c2f 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlModelLoader.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlModelLoader.java @@ -415,51 +415,84 @@ private void parseApplyStatement() { private void parseFirstShapeStatement(SourceLocation possibleDocCommentLocation) { if (tokenizer.getCurrentToken() != IdlToken.EOF) { - if (tokenizer.doesCurrentIdentifierStartWith('a')) { - parseApplyStatement(); - } else { - List traits; - boolean hasDocComment = tokenizer.getCurrentToken() == IdlToken.DOC_COMMENT; - - if (possibleDocCommentLocation == null) { - traits = IdlTraitParser.parseDocsAndTraitsBeforeShape(tokenizer, resolver); + try { + if (tokenizer.doesCurrentIdentifierStartWith('a')) { + parseApplyStatement(); } else { - // In this case, this is the first shape encountered for a model file that doesn't have any - // use statements. We need to take the previously skipped documentation comments to parse - // potential use statements and apply them to this first shape. - String docLines = tokenizer.removePendingDocCommentLines(); - traits = IdlTraitParser.parseDocsAndTraitsBeforeShape(tokenizer, resolver); - // Note that possibleDocCommentLocation is just a mark of where docs _could be_. - if (docLines != null) { - hasDocComment = true; - traits.add(new IdlTraitParser.Result(DocumentationTrait.ID.toString(), - new StringNode(docLines, possibleDocCommentLocation), - IdlTraitParser.TraitType.DOC_COMMENT)); + List traits; + boolean hasDocComment = tokenizer.getCurrentToken() == IdlToken.DOC_COMMENT; + + if (possibleDocCommentLocation == null) { + traits = IdlTraitParser.parseDocsAndTraitsBeforeShape(tokenizer, resolver); + } else { + // In this case, this is the first shape encountered for a model file that doesn't have any + // use statements. We need to take the previously skipped documentation comments to parse + // potential use statements and apply them to this first shape. + String docLines = tokenizer.removePendingDocCommentLines(); + traits = IdlTraitParser.parseDocsAndTraitsBeforeShape(tokenizer, resolver); + // Note that possibleDocCommentLocation is just a mark of where docs _could be_. + if (docLines != null) { + hasDocComment = true; + traits.add(new IdlTraitParser.Result(DocumentationTrait.ID.toString(), + new StringNode(docLines, possibleDocCommentLocation), + IdlTraitParser.TraitType.DOC_COMMENT)); + } } - } - if (parseShapeDefinition(traits, hasDocComment)) { - parseShape(traits); + if (parseShapeDefinition(traits, hasDocComment)) { + parseShape(traits); + } } + } catch (ModelSyntaxException e) { + errorRecovery(e); } } } private void parseSubsequentShapeStatements() { - while (tokenizer.getCurrentToken() != IdlToken.EOF) { - if (tokenizer.doesCurrentIdentifierStartWith('a')) { - parseApplyStatement(); - } else { - List traits; - boolean hasDocComment = tokenizer.getCurrentToken() == IdlToken.DOC_COMMENT; - traits = IdlTraitParser.parseDocsAndTraitsBeforeShape(tokenizer, resolver); - if (parseShapeDefinition(traits, hasDocComment)) { - parseShape(traits); + while (tokenizer.hasNext()) { + try { + if (tokenizer.doesCurrentIdentifierStartWith('a')) { + parseApplyStatement(); + } else { + List traits; + boolean hasDocComment = tokenizer.getCurrentToken() == IdlToken.DOC_COMMENT; + traits = IdlTraitParser.parseDocsAndTraitsBeforeShape(tokenizer, resolver); + if (parseShapeDefinition(traits, hasDocComment)) { + parseShape(traits); + } } + } catch (ModelSyntaxException e) { + errorRecovery(e); } } } + private void errorRecovery(ModelSyntaxException e) { + if (!tokenizer.hasNext()) { + throw e; + } + + // Here we do rudimentary error recovery to attempt to make sense of the remaining model. + // We do this by scanning tokens until we find the next identifier, docs, or trait at the start of a line. + // The model is still invalid and will fail to validate, but things like IDEs should still be able to do + // things like goto definition. + emit(ValidationEvent.fromSourceException(e)); + + do { + // Always skip the current token since it was the one that failed. + IdlToken token = tokenizer.next(); + if (tokenizer.getCurrentTokenColumn() == 1 && isErrorRecoveryToken(token)) { + break; + } + } while (tokenizer.hasNext()); + } + + // These tokens are good signals that the next shape is starting. + private boolean isErrorRecoveryToken(IdlToken token) { + return token == IdlToken.IDENTIFIER || token == IdlToken.DOC_COMMENT || token == IdlToken.AT; + } + private boolean parseShapeDefinition(List traits, boolean hasDocComment) { if (tokenizer.getCurrentToken() != IdlToken.EOF) { // Continue to parse if not at the end of the file. diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTokenizer.java b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTokenizer.java index 3cdc9071671..4b9b828397c 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTokenizer.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTokenizer.java @@ -321,7 +321,7 @@ public IdlToken next() { switch (c) { case SimpleParser.EOF: if (currentTokenType == IdlToken.EOF) { - throw new NoSuchElementException("Expected another token but traversed beyond EOF"); + throw new NoSuchElementException("Expected another token but reached EOF"); } currentTokenEnd = parser.position(); return currentTokenType = IdlToken.EOF; diff --git a/smithy-model/src/test/java/software/amazon/smithy/model/loader/IdlModelLoaderTest.java b/smithy-model/src/test/java/software/amazon/smithy/model/loader/IdlModelLoaderTest.java index cf4f5ff9470..0002a59e71f 100644 --- a/smithy-model/src/test/java/software/amazon/smithy/model/loader/IdlModelLoaderTest.java +++ b/smithy-model/src/test/java/software/amazon/smithy/model/loader/IdlModelLoaderTest.java @@ -339,4 +339,38 @@ public void setsCorrectLocationForEnum() { assertThat(fooBarMember.getSourceLocation().getLine(), is(7)); assertThat(fooBarMember.getSourceLocation().getColumn(), is(5)); } + + @Test + public void doesBasicErrorRecovery() { + ValidatedResult result = Model.assembler() + .addImport(getClass().getResource("error-recovery.smithy")) + .assemble(); + + assertThat(result.isBroken(), is(true)); + assertThat(result.getResult().isPresent(), is(true)); + + Model model = result.getResult().get(); + + assertThat(model.getShape(ShapeId.from("smithy.example#MyString")).isPresent(), is(true)); + assertThat(model.getShape(ShapeId.from("smithy.example#MyFooIsBroken")).isPresent(), is(false)); + assertThat(model.getShape(ShapeId.from("smithy.example#MyInteger")).isPresent(), is(false)); + assertThat(model.getShape(ShapeId.from("smithy.example#MyInteger2")).isPresent(), is(true)); + + System.out.println(result.getValidationEvents()); + + boolean foundSyntax = false; + boolean foundTrait = false; + for (ValidationEvent e : result.getValidationEvents()) { + if (e.getSeverity() == Severity.ERROR && e.getMessage().contains( + "Syntax error at line 9, column 9: Expected COLON(':') but found IDENTIFIER('MyInteger')")) { + foundSyntax = true; + } + if (e.getSeverity() == Severity.ERROR && e.getMessage().contains("Unable to resolve trait")) { + foundTrait = true; + } + } + + assertThat(foundSyntax, is(true)); + assertThat(foundTrait, is(true)); + } } diff --git a/smithy-model/src/test/resources/software/amazon/smithy/model/loader/error-recovery.smithy b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/error-recovery.smithy new file mode 100644 index 00000000000..5ac7655d45f --- /dev/null +++ b/smithy-model/src/test/resources/software/amazon/smithy/model/loader/error-recovery.smithy @@ -0,0 +1,13 @@ +$version: "2.0" + +namespace smithy.example + +string MyString + +structure MyFooIsBroken { +// The parser will keep trying to parse here, assuming integer is a key and needs to be followed by ":". +integer MyInteger + +// When the above fails, error recovery kicks in, looking for the next token at the start of the line. +@unknown +integer MyInteger2