diff --git a/docs/parsers.md b/docs/parsers.md index b0f72cc..a2c59a0 100644 --- a/docs/parsers.md +++ b/docs/parsers.md @@ -684,6 +684,29 @@ outer.Parse("a.b-c"); // Inner uses '.', outer uses '-' as whitespace > Note: The custom whitespace parser must return a `TextSpan`. Use `Capture()` to wrap parsers that don't return `TextSpan`. +### WithComments + +Based on `WithWhiteSpaceParser`, this helper makes it easier to define custom comments syntax. + +Usage: + +```c# +var hello = Terms.Text("hello"); +var world = Terms.Text("world"); +var parser = hello.And(world) + .WithComments(builder => + { + builder.WithSingleLine("--"); + builder.WithSingleLine("#"); + builder.WithMultiLine("/*", "*/"); + }); + +parser.Parse("hello -- comment\n world"); +parser.Parse("hello -- comment\r\n world"); +parser.Parse("hello # comment\n world"); +parser.Parse("hello /* multiline\n comment\n */ world"); +``` + ### Deferred Creates a parser that can be referenced before it is actually defined. This is used when there is a cyclic dependency between parsers. @@ -999,6 +1022,8 @@ Returns any characters until the specified parser is matched. Parser AnyCharBefore(Parser parser, bool canBeEmpty = false, bool failOnEof = false, bool consumeDelimiter = false) ``` +It is important to use `AnyCharBefore(a.Or(b))` instead of `AnyCharBefore(a).Or(AnyCharBefore(b))` for performance reasons. Otherwise the first parser will have to look ahead for the whole source if only the second parser can be matched. By using a single `AnyCharBefore`, it will check whatever is first in the source, and then jump to the next option. + ### Always Always returns successfully, with an optional return type or value. @@ -1025,3 +1050,7 @@ Like [Or](#Or), with an unlimited list of parsers. ```c# Parser OneOf(params Parser[] parsers) ``` + +## Comments + +Whitespaces are parsed automatically when using `Terms` helper methods. To use custom comments \ No newline at end of file diff --git a/src/Parlot/Fluent/Capture.cs b/src/Parlot/Fluent/Capture.cs index 5a12080..8348e0c 100644 --- a/src/Parlot/Fluent/Capture.cs +++ b/src/Parlot/Fluent/Capture.cs @@ -1,15 +1,30 @@ using Parlot.Compilation; +using Parlot.Rewriting; using System.Linq.Expressions; namespace Parlot.Fluent; -public sealed class Capture : Parser, ICompilable +public sealed class Capture : Parser, ICompilable, ISeekable { private readonly Parser _parser; + public bool CanSeek { get; } + + public char[] ExpectedChars { get; } = []; + + public bool SkipWhitespace { get; } + + public Capture(Parser parser) { _parser = parser; + + if (parser is ISeekable seekable && seekable.CanSeek) + { + CanSeek = true; + ExpectedChars = seekable.ExpectedChars; + SkipWhitespace = seekable.SkipWhitespace; + } } public override bool Parse(ParseContext context, ref ParseResult result) diff --git a/src/Parlot/Fluent/ParserExtensions.WhiteSpace.cs b/src/Parlot/Fluent/ParserExtensions.WhiteSpace.cs index 31ac1a9..d66fc45 100644 --- a/src/Parlot/Fluent/ParserExtensions.WhiteSpace.cs +++ b/src/Parlot/Fluent/ParserExtensions.WhiteSpace.cs @@ -1,10 +1,12 @@ +using System; +using System.Collections.Generic; + namespace Parlot.Fluent; public static partial class ParserExtensions { /// - /// Builds a parser that temporarily sets a custom whitespace parser for the current parser. - /// The whitespace parser will be reset after the parser completes. + /// Builds a parser that sets a custom whitespace parser for the current parser. /// /// The type of the parser result. /// The parser to execute with the custom whitespace parser. @@ -12,4 +14,47 @@ public static partial class ParserExtensions /// A parser that uses the custom whitespace parser. public static Parser WithWhiteSpaceParser(this Parser parser, Parser whiteSpaceParser) => new WithWhiteSpaceParser(parser, whiteSpaceParser); + + /// + /// Builds a parser that sets comments for the current parser. + /// + /// The type of the parser result. + /// The parser to execute with the custom whitespace parser. + /// The action to configure the comments builder. + /// A parser that uses white spaces, new lines and comments. + public static Parser WithComments(this Parser parser, Action commentsBuilder) + { + var builder = new CommentsBuilder(Literals.WhiteSpace(includeNewLines: true)); + commentsBuilder(builder); + return new WithWhiteSpaceParser(parser, builder.Build()); + } +} + +public class CommentsBuilder +{ + private readonly List> _parsers = []; + + public CommentsBuilder(Parser whiteSpaceParser) + { + _parsers.Add(whiteSpaceParser); + } + + public Parser WithSingleLine(string singleLineStart) + { + var parser = Literals.Comments(singleLineStart); + _parsers.Add(parser); + return parser; + } + + public Parser WithMultiLine(string multiLineStart, string multiLineEnd) + { + var parser = Literals.Comments(multiLineStart, multiLineEnd); + _parsers.Add(parser); + return parser; + } + + public Parser Build() + { + return Capture(ZeroOrMany(OneOf(_parsers.ToArray()))); + } } diff --git a/src/Parlot/Fluent/Parsers.cs b/src/Parlot/Fluent/Parsers.cs index 3b229df..ac53be0 100644 --- a/src/Parlot/Fluent/Parsers.cs +++ b/src/Parlot/Fluent/Parsers.cs @@ -291,6 +291,21 @@ public Parser Identifier(Func? extraStart = null, FuncWhen the parser reaches the maximum number of chars it returns . Defaults to 0, i.e. no maximum size. public Parser NoneOf(ReadOnlySpan values, int minSize = 1, int maxSize = 0) => new ListOfChars(values, minSize, maxSize, negate: true); #endif + + /// + /// Builds a parser that matches single line comments. + /// + /// The text that starts the single line comment, e.g., "//", "--", "#" + /// + public Parser Comments(string singleLineStart) => Capture(Text(singleLineStart).And(AnyCharBefore(Text("\r\n").Or(Text("\n")), canBeEmpty: true, failOnEof: false, consumeDelimiter: false))); + + /// + /// Builds a parser that matches multi line comments. + /// + /// The text that starts the multi line comment, e.g., "/*" + /// The text that ends the multi line comment, e.g., "*/" + /// + public Parser Comments(string multiLineStart, string multiLineEnd) => Capture(Text(multiLineStart).And(AnyCharBefore(Text(multiLineEnd), canBeEmpty: true, failOnEof: true, consumeDelimiter: true).ElseError($"End-of-file found, '{multiLineEnd}' expected"))); } public class TermBuilder @@ -434,6 +449,21 @@ public Parser Identifier(Func? extraStart = null, FuncThe set of chars not to match. /// The minimum number of required chars. Defaults to 1. /// When the parser reaches the maximum number of chars it returns . Defaults to 0, i.e. no maximum size. - public Parser NoneOf(ReadOnlySpan values, int minSize = 1, int maxSize = 0) => new ListOfChars(values, minSize, maxSize, negate: true); + public Parser NoneOf(ReadOnlySpan values, int minSize = 1, int maxSize = 0) => Parsers.SkipWhiteSpace(new ListOfChars(values, minSize, maxSize, negate: true)); #endif + + /// + /// Builds a parser that matches single line comments. + /// + /// The text that starts the single line comment, e.g., "//", "--", "#" + /// + public Parser Comments(string singleLineStart) => Literals.WhiteSpace(includeNewLines: true).Optional().SkipAnd(Literals.Comments(singleLineStart)); + + /// + /// Builds a parser that matches multi line comments. + /// + /// The text that starts the multi line comment, e.g., "/*" + /// The text that ends the multi line comment, e.g., "*/" + /// + public Parser Comments(string multiLineStart, string multiLineEnd) => Literals.WhiteSpace(includeNewLines: true).Optional().SkipAnd(Literals.Comments(multiLineStart, multiLineEnd)); } diff --git a/src/Parlot/Fluent/TextBefore.cs b/src/Parlot/Fluent/TextBefore.cs index 0f3600b..be2b740 100644 --- a/src/Parlot/Fluent/TextBefore.cs +++ b/src/Parlot/Fluent/TextBefore.cs @@ -53,17 +53,17 @@ public override bool Parse(ParseContext context, ref ParseResult resul var parsed = new ParseResult(); - if (_canJumpToNextExpectedChar) + while (true) { + if (_canJumpToNextExpectedChar) + { #if NET8_0_OR_GREATER - JumpToNextExpectedChar(context, _expectedSearchValues!); + JumpToNextExpectedChar(context, _expectedSearchValues!); #else - JumpToNextExpectedChar(context, _expectedChars!); + JumpToNextExpectedChar(context, _expectedChars!); #endif - } + } - while (true) - { var previous = context.Scanner.Cursor.Position; if (context.Scanner.Cursor.Eof) @@ -122,18 +122,27 @@ private static void JumpToNextExpectedChar(ParseContext context, SearchValues= 0) + switch (index) { - context.Scanner.Cursor.Advance(index); + case >= 0: + context.Scanner.Cursor.Advance(index); + break; + case -1: + // No expected char found, move to the end + context.Scanner.Cursor.Advance(context.Scanner.Cursor.Span.Length); + break; } } #else private static void JumpToNextExpectedChar(ParseContext context, char[] expectedChars) { var indexOfAny = int.MaxValue; + var span = context.Scanner.Cursor.Span; + foreach (var c in expectedChars) { - var index = context.Scanner.Cursor.Span.IndexOf(c); + var index = span.IndexOf(c); + if (index >= 0) { indexOfAny = Math.Min(indexOfAny, index); @@ -144,6 +153,11 @@ private static void JumpToNextExpectedChar(ParseContext context, char[] expected { context.Scanner.Cursor.Advance(indexOfAny); } + else + { + // No expected char found, move to the end + context.Scanner.Cursor.Advance(context.Scanner.Cursor.Span.Length); + } } #endif diff --git a/src/Samples/Sql/SqlParser.cs b/src/Samples/Sql/SqlParser.cs index b202618..ced8c9b 100644 --- a/src/Samples/Sql/SqlParser.cs +++ b/src/Samples/Sql/SqlParser.cs @@ -388,10 +388,14 @@ static SqlParser() .Then(x => new StatementLine(x)); // Statement list - var statementList = OneOrMany(statementLine) - .Then(statements => new StatementList(statements)); + var statementList = SkipWhiteSpace(ZeroOrMany(statementLine) + .Then(statements => new StatementList(statements)).Eof()); - Statements = statementList; + Statements = statementList.WithComments(comments => + { + comments.WithSingleLine("--"); + comments.WithMultiLine("/*", "*/"); + }); } public static StatementList? Parse(string input) diff --git a/test/Parlot.Tests/CommentTests.cs b/test/Parlot.Tests/CommentTests.cs new file mode 100644 index 0000000..c1e5106 --- /dev/null +++ b/test/Parlot.Tests/CommentTests.cs @@ -0,0 +1,111 @@ +using Parlot.Fluent; +using Xunit; + +using static Parlot.Fluent.Parsers; + +namespace Parlot.Tests; + +public class CommentTests +{ + [Theory] + [InlineData("-- single line comment", "-- single line comment")] + [InlineData("-- ", "-- ")] + [InlineData("--", "--")] + [InlineData("--\n", "--")] + [InlineData("--\r\n", "--")] + [InlineData("-- some comment\n text here", "-- some comment")] + public void ShouldReadSingleLineComments(string text, string expected) + { + var comments = Literals.Comments("--"); + Assert.Equal(expected, comments.Parse(text).ToString()); + } + + [Theory] + [InlineData("hello-- single line comment\n world")] + [InlineData("hello-- \n world")] + [InlineData("hello--\n world")] + [InlineData("hello --\n world")] + public void ShouldSkipSingleLineComments(string text) + { + + var comments = Terms.Text("hello").And(Terms.Text("world")).WithWhiteSpaceParser(Capture(ZeroOrMany(Literals.WhiteSpace(includeNewLines: true).Or(Literals.Comments("--"))))); + Assert.True(comments.TryParse(text, out _)); + } + + [Theory] + [InlineData("hello -- single line comment")] + [InlineData("hello --")] + [InlineData("hello--")] + public void ShouldReadSingleLineCommentsAfterText(string text) + { + var comments = Terms.Text("hello").And(Terms.Comments("--")); + Assert.True(comments.TryParse(text, out _)); + } + + [Theory] + [InlineData("/* multi line comment */")] + [InlineData("/* multi \nline comment */")] + [InlineData("/**/")] + [InlineData("/*\n*/")] + [InlineData("/* */")] + public void ShouldReadMultiLineComments(string text) + { + var comments = Literals.Comments("/*", "*/"); + Assert.Equal(text, comments.Parse(text).ToString()); + } + + [Theory] + [InlineData("hello /* multi line comment */world")] + [InlineData("hello /**/world")] + [InlineData("hello/* */ world")] + [InlineData("hello /* multi line \n comment */ world")] + [InlineData("hello /* multi line \n comment */ world\n")] + [InlineData("hello /* multi \nline \n comment */ world")] + [InlineData("hello /* multi line \n\n comment */ world")] + [InlineData("hello /*\n*/ world")] + [InlineData("hello/* */ world\n")] + public void ShouldReadMultiLineCommentsAfterText(string text) + { + var comments = Terms.Text("hello").And(Terms.Comments("/*", "*/")).And(Terms.Text("world")); + Assert.True(comments.TryParse(text, out _)); + } + + [Theory] + [InlineData("hello /* multi line comment ")] + [InlineData("hello /* asd")] + [InlineData("hello/* ")] + public void ShouldFailUnterminatedMultiLineComments(string text) + { + var comments = Terms.Text("hello").And(Terms.Comments("/*", "*/")); + Assert.False(comments.TryParse(text, out _)); + } + + [Theory] + [InlineData("hello-- single line comment\n world")] + [InlineData("hello-- \n world")] + [InlineData("hello--\n world")] + [InlineData("hello --\n world")] + [InlineData("hello --\r\n world")] + [InlineData("hello -- \r\n world")] + [InlineData("hello world # comment")] + [InlineData("hello world -- comment")] + [InlineData("hello world -- # comment")] + [InlineData("hello#comment\nworld ")] + [InlineData("hello\n#\n#\n--\r\nworld")] + [InlineData("hello/* comment */ /*comment2*/ world")] + [InlineData("hello/*--\n*/ world")] + [InlineData("hello /* /* */world")] + [InlineData("hello world")] + public void ShouldParseAllComments(string text) + { + var comments = Terms.Text("hello").And(Terms.Text("world")) + .WithComments(builder => + { + builder.WithSingleLine("--"); + builder.WithSingleLine("#"); + builder.WithMultiLine("/*", "*/"); + }); + + Assert.True(comments.TryParse(text, out _)); + } +} diff --git a/test/Parlot.Tests/FluentTests.cs b/test/Parlot.Tests/FluentTests.cs index acc98c1..cb31d66 100644 --- a/test/Parlot.Tests/FluentTests.cs +++ b/test/Parlot.Tests/FluentTests.cs @@ -5,6 +5,7 @@ using System.Globalization; using System.Linq; using System.Numerics; +using System.Runtime.InteropServices; using Xunit; using static Parlot.Fluent.Parsers; diff --git a/test/Parlot.Tests/Sql/SqlParserTests.cs b/test/Parlot.Tests/Sql/SqlParserTests.cs index 498fedb..819cd0b 100644 --- a/test/Parlot.Tests/Sql/SqlParserTests.cs +++ b/test/Parlot.Tests/Sql/SqlParserTests.cs @@ -5,6 +5,27 @@ namespace Parlot.Tests.Sql; public class SqlParserTests { + [Theory] + [InlineData("SELECT * -- comment \n FROM users")] + [InlineData("SELECT id, name /* multiline\n comment\n */ FROM users")] + [InlineData("/* some documentation */ SELECT id, name FROM users WHERE id = 1")] + public void ShouldParseComments(string sql) + { + var result = SqlParser.Parse(sql); + Assert.NotNull(result); + Assert.Single(result.Statements); + } + + [Theory] + [InlineData("-- comment SELECT * FROM users")] + [InlineData("/* some documentation SELECT id, name FROM users WHERE id = 1 */")] + public void ShouldParseCommentsWithNoResults(string sql) + { + var result = SqlParser.Parse(sql); + Assert.NotNull(result); + Assert.Empty(result.Statements); + } + [Theory] [InlineData("SELECT * FROM users")] [InlineData("SELECT id, name FROM users")]