Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions docs/parsers.md
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,29 @@ outer.Parse("a.b-c"); // Inner uses '.', outer uses '-' as whitespace

> Note: The custom whitespace parser must return a `TextSpan`. Use `Capture()` to wrap parsers that don't return `TextSpan`.

### WithComments

Based on `WithWhiteSpaceParser`, this helper makes it easier to define custom comments syntax.

Usage:

```c#
var hello = Terms.Text("hello");
var world = Terms.Text("world");
var parser = hello.And(world)
.WithComments(builder =>
{
builder.WithSingleLine("--");
builder.WithSingleLine("#");
builder.WithMultiLine("/*", "*/");
});

parser.Parse("hello -- comment\n world");
parser.Parse("hello -- comment\r\n world");
parser.Parse("hello # comment\n world");
parser.Parse("hello /* multiline\n comment\n */ world");
```

### Deferred

Creates a parser that can be referenced before it is actually defined. This is used when there is a cyclic dependency between parsers.
Expand Down Expand Up @@ -999,6 +1022,8 @@ Returns any characters until the specified parser is matched.
Parser<TextSpan> AnyCharBefore<T>(Parser<T> parser, bool canBeEmpty = false, bool failOnEof = false, bool consumeDelimiter = false)
```

It is important to use `AnyCharBefore(a.Or(b))` instead of `AnyCharBefore(a).Or(AnyCharBefore(b))` for performance reasons. Otherwise the first parser will have to look ahead for the whole source if only the second parser can be matched. By using a single `AnyCharBefore`, it will check whatever is first in the source, and then jump to the next option.

### Always

Always returns successfully, with an optional return type or value.
Expand All @@ -1025,3 +1050,7 @@ Like [Or](#Or), with an unlimited list of parsers.
```c#
Parser<T> OneOf<T>(params Parser<T>[] parsers)
```

## Comments

Whitespaces are parsed automatically when using `Terms` helper methods. To use custom comments
17 changes: 16 additions & 1 deletion src/Parlot/Fluent/Capture.cs
Original file line number Diff line number Diff line change
@@ -1,15 +1,30 @@
using Parlot.Compilation;
using Parlot.Rewriting;
using System.Linq.Expressions;

namespace Parlot.Fluent;

public sealed class Capture<T> : Parser<TextSpan>, ICompilable
public sealed class Capture<T> : Parser<TextSpan>, ICompilable, ISeekable
{
private readonly Parser<T> _parser;

public bool CanSeek { get; }

public char[] ExpectedChars { get; } = [];

public bool SkipWhitespace { get; }


public Capture(Parser<T> parser)
{
_parser = parser;

if (parser is ISeekable seekable && seekable.CanSeek)
{
CanSeek = true;
ExpectedChars = seekable.ExpectedChars;
SkipWhitespace = seekable.SkipWhitespace;
}
}

public override bool Parse(ParseContext context, ref ParseResult<TextSpan> result)
Expand Down
49 changes: 47 additions & 2 deletions src/Parlot/Fluent/ParserExtensions.WhiteSpace.cs
Original file line number Diff line number Diff line change
@@ -1,15 +1,60 @@
using System;
using System.Collections.Generic;

namespace Parlot.Fluent;

public static partial class ParserExtensions
{
/// <summary>
/// Builds a parser that temporarily sets a custom whitespace parser for the current parser.
/// The whitespace parser will be reset after the parser completes.
/// Builds a parser that sets a custom whitespace parser for the current parser.
/// </summary>
/// <typeparam name="T">The type of the parser result.</typeparam>
/// <param name="parser">The parser to execute with the custom whitespace parser.</param>
/// <param name="whiteSpaceParser">The custom whitespace parser to use.</param>
/// <returns>A parser that uses the custom whitespace parser.</returns>
public static Parser<T> WithWhiteSpaceParser<T>(this Parser<T> parser, Parser<TextSpan> whiteSpaceParser)
=> new WithWhiteSpaceParser<T>(parser, whiteSpaceParser);

/// <summary>
/// Builds a parser that sets comments for the current parser.
/// </summary>
/// <typeparam name="T">The type of the parser result.</typeparam>
/// <param name="parser">The parser to execute with the custom whitespace parser.</param>
/// <param name="commentsBuilder">The action to configure the comments builder.</param>
/// <returns>A parser that uses white spaces, new lines and comments.</returns>
public static Parser<T> WithComments<T>(this Parser<T> parser, Action<CommentsBuilder> commentsBuilder)
{
var builder = new CommentsBuilder(Literals.WhiteSpace(includeNewLines: true));
commentsBuilder(builder);
return new WithWhiteSpaceParser<T>(parser, builder.Build());
}
}

public class CommentsBuilder
{
private readonly List<Parser<TextSpan>> _parsers = [];

public CommentsBuilder(Parser<TextSpan> whiteSpaceParser)
{
_parsers.Add(whiteSpaceParser);
}

public Parser<TextSpan> WithSingleLine(string singleLineStart)
{
var parser = Literals.Comments(singleLineStart);
_parsers.Add(parser);
return parser;
}

public Parser<TextSpan> WithMultiLine(string multiLineStart, string multiLineEnd)
{
var parser = Literals.Comments(multiLineStart, multiLineEnd);
_parsers.Add(parser);
return parser;
}

public Parser<TextSpan> Build()
{
return Capture(ZeroOrMany(OneOf(_parsers.ToArray())));
}
}
32 changes: 31 additions & 1 deletion src/Parlot/Fluent/Parsers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,21 @@ public Parser<TextSpan> Identifier(Func<char, bool>? extraStart = null, Func<cha
/// <param name="maxSize">When the parser reaches the maximum number of chars it returns <see langword="True"/>. Defaults to 0, i.e. no maximum size.</param>
public Parser<TextSpan> NoneOf(ReadOnlySpan<char> values, int minSize = 1, int maxSize = 0) => new ListOfChars(values, minSize, maxSize, negate: true);
#endif

/// <summary>
/// Builds a parser that matches single line comments.
/// </summary>
/// <param name="singleLineStart">The text that starts the single line comment, e.g., <code>"//"</code>, <code>"--"</code>, <code>"#"</code></param>
/// <returns></returns>
public Parser<TextSpan> Comments(string singleLineStart) => Capture(Text(singleLineStart).And(AnyCharBefore(Text("\r\n").Or(Text("\n")), canBeEmpty: true, failOnEof: false, consumeDelimiter: false)));

/// <summary>
/// Builds a parser that matches multi line comments.
/// </summary>
/// <param name="multiLineStart">The text that starts the multi line comment, e.g., <code>"/*"</code></param>
/// <param name="multiLineEnd">The text that ends the multi line comment, e.g., <code>"*/"</code></param>
/// <returns></returns>
public Parser<TextSpan> Comments(string multiLineStart, string multiLineEnd) => Capture(Text(multiLineStart).And(AnyCharBefore(Text(multiLineEnd), canBeEmpty: true, failOnEof: true, consumeDelimiter: true).ElseError($"End-of-file found, '{multiLineEnd}' expected")));
}

public class TermBuilder
Expand Down Expand Up @@ -434,6 +449,21 @@ public Parser<TextSpan> Identifier(Func<char, bool>? extraStart = null, Func<cha
/// <param name="values">The set of chars not to match.</param>
/// <param name="minSize">The minimum number of required chars. Defaults to 1.</param>
/// <param name="maxSize">When the parser reaches the maximum number of chars it returns <see langword="True"/>. Defaults to 0, i.e. no maximum size.</param>
public Parser<TextSpan> NoneOf(ReadOnlySpan<char> values, int minSize = 1, int maxSize = 0) => new ListOfChars(values, minSize, maxSize, negate: true);
public Parser<TextSpan> NoneOf(ReadOnlySpan<char> values, int minSize = 1, int maxSize = 0) => Parsers.SkipWhiteSpace(new ListOfChars(values, minSize, maxSize, negate: true));
#endif

/// <summary>
/// Builds a parser that matches single line comments.
/// </summary>
/// <param name="singleLineStart">The text that starts the single line comment, e.g., <code>"//"</code>, <code>"--"</code>, <code>"#"</code></param>
/// <returns></returns>
public Parser<TextSpan> Comments(string singleLineStart) => Literals.WhiteSpace(includeNewLines: true).Optional().SkipAnd(Literals.Comments(singleLineStart));

/// <summary>
/// Builds a parser that matches multi line comments.
/// </summary>
/// <param name="multiLineStart">The text that starts the multi line comment, e.g., <code>"/*"</code></param>
/// <param name="multiLineEnd">The text that ends the multi line comment, e.g., <code>"*/"</code></param>
/// <returns></returns>
public Parser<TextSpan> Comments(string multiLineStart, string multiLineEnd) => Literals.WhiteSpace(includeNewLines: true).Optional().SkipAnd(Literals.Comments(multiLineStart, multiLineEnd));
}
32 changes: 23 additions & 9 deletions src/Parlot/Fluent/TextBefore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,17 @@ public override bool Parse(ParseContext context, ref ParseResult<TextSpan> resul

var parsed = new ParseResult<T>();

if (_canJumpToNextExpectedChar)
while (true)
{
if (_canJumpToNextExpectedChar)
{
#if NET8_0_OR_GREATER
JumpToNextExpectedChar(context, _expectedSearchValues!);
JumpToNextExpectedChar(context, _expectedSearchValues!);
#else
JumpToNextExpectedChar(context, _expectedChars!);
JumpToNextExpectedChar(context, _expectedChars!);
#endif
}
}

while (true)
{
var previous = context.Scanner.Cursor.Position;

if (context.Scanner.Cursor.Eof)
Expand Down Expand Up @@ -122,18 +122,27 @@ private static void JumpToNextExpectedChar(ParseContext context, SearchValues<ch
{
var index = context.Scanner.Cursor.Span.IndexOfAny(expectedChars);

if (index >= 0)
switch (index)
{
context.Scanner.Cursor.Advance(index);
case >= 0:
context.Scanner.Cursor.Advance(index);
break;
case -1:
// No expected char found, move to the end
context.Scanner.Cursor.Advance(context.Scanner.Cursor.Span.Length);
break;
}
}
#else
private static void JumpToNextExpectedChar(ParseContext context, char[] expectedChars)
{
var indexOfAny = int.MaxValue;
var span = context.Scanner.Cursor.Span;

foreach (var c in expectedChars)
{
var index = context.Scanner.Cursor.Span.IndexOf(c);
var index = span.IndexOf(c);

if (index >= 0)
{
indexOfAny = Math.Min(indexOfAny, index);
Expand All @@ -144,6 +153,11 @@ private static void JumpToNextExpectedChar(ParseContext context, char[] expected
{
context.Scanner.Cursor.Advance(indexOfAny);
}
else
{
// No expected char found, move to the end
context.Scanner.Cursor.Advance(context.Scanner.Cursor.Span.Length);
}
}
#endif

Expand Down
10 changes: 7 additions & 3 deletions src/Samples/Sql/SqlParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -388,10 +388,14 @@ static SqlParser()
.Then(x => new StatementLine(x));

// Statement list
var statementList = OneOrMany(statementLine)
.Then(statements => new StatementList(statements));
var statementList = SkipWhiteSpace(ZeroOrMany(statementLine)
.Then(statements => new StatementList(statements)).Eof());

Statements = statementList;
Statements = statementList.WithComments(comments =>
{
comments.WithSingleLine("--");
comments.WithMultiLine("/*", "*/");
});
}

public static StatementList? Parse(string input)
Expand Down
111 changes: 111 additions & 0 deletions test/Parlot.Tests/CommentTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
using Parlot.Fluent;
using Xunit;

using static Parlot.Fluent.Parsers;

namespace Parlot.Tests;

public class CommentTests
{
[Theory]
[InlineData("-- single line comment", "-- single line comment")]
[InlineData("-- ", "-- ")]
[InlineData("--", "--")]
[InlineData("--\n", "--")]
[InlineData("--\r\n", "--")]
[InlineData("-- some comment\n text here", "-- some comment")]
public void ShouldReadSingleLineComments(string text, string expected)
{
var comments = Literals.Comments("--");
Assert.Equal(expected, comments.Parse(text).ToString());
}

[Theory]
[InlineData("hello-- single line comment\n world")]
[InlineData("hello-- \n world")]
[InlineData("hello--\n world")]
[InlineData("hello --\n world")]
public void ShouldSkipSingleLineComments(string text)
{

var comments = Terms.Text("hello").And(Terms.Text("world")).WithWhiteSpaceParser(Capture(ZeroOrMany(Literals.WhiteSpace(includeNewLines: true).Or(Literals.Comments("--")))));
Assert.True(comments.TryParse(text, out _));
}

[Theory]
[InlineData("hello -- single line comment")]
[InlineData("hello --")]
[InlineData("hello--")]
public void ShouldReadSingleLineCommentsAfterText(string text)
{
var comments = Terms.Text("hello").And(Terms.Comments("--"));
Assert.True(comments.TryParse(text, out _));
}

[Theory]
[InlineData("/* multi line comment */")]
[InlineData("/* multi \nline comment */")]
[InlineData("/**/")]
[InlineData("/*\n*/")]
[InlineData("/* */")]
public void ShouldReadMultiLineComments(string text)
{
var comments = Literals.Comments("/*", "*/");
Assert.Equal(text, comments.Parse(text).ToString());
}

[Theory]
[InlineData("hello /* multi line comment */world")]
[InlineData("hello /**/world")]
[InlineData("hello/* */ world")]
[InlineData("hello /* multi line \n comment */ world")]
[InlineData("hello /* multi line \n comment */ world\n")]
[InlineData("hello /* multi \nline \n comment */ world")]
[InlineData("hello /* multi line \n\n comment */ world")]
[InlineData("hello /*\n*/ world")]
[InlineData("hello/* */ world\n")]
public void ShouldReadMultiLineCommentsAfterText(string text)
{
var comments = Terms.Text("hello").And(Terms.Comments("/*", "*/")).And(Terms.Text("world"));
Assert.True(comments.TryParse(text, out _));
}

[Theory]
[InlineData("hello /* multi line comment ")]
[InlineData("hello /* asd")]
[InlineData("hello/* ")]
public void ShouldFailUnterminatedMultiLineComments(string text)
{
var comments = Terms.Text("hello").And(Terms.Comments("/*", "*/"));
Assert.False(comments.TryParse(text, out _));
}

[Theory]
[InlineData("hello-- single line comment\n world")]
[InlineData("hello-- \n world")]
[InlineData("hello--\n world")]
[InlineData("hello --\n world")]
[InlineData("hello --\r\n world")]
[InlineData("hello -- \r\n world")]
[InlineData("hello world # comment")]
[InlineData("hello world -- comment")]
[InlineData("hello world -- # comment")]
[InlineData("hello#comment\nworld ")]
[InlineData("hello\n#\n#\n--\r\nworld")]
[InlineData("hello/* comment */ /*comment2*/ world")]
[InlineData("hello/*--\n*/ world")]
[InlineData("hello /* /* */world")]
[InlineData("hello world")]
public void ShouldParseAllComments(string text)
{
var comments = Terms.Text("hello").And(Terms.Text("world"))
.WithComments(builder =>
{
builder.WithSingleLine("--");
builder.WithSingleLine("#");
builder.WithMultiLine("/*", "*/");
});

Assert.True(comments.TryParse(text, out _));
}
}
1 change: 1 addition & 0 deletions test/Parlot.Tests/FluentTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Globalization;
using System.Linq;
using System.Numerics;
using System.Runtime.InteropServices;
using Xunit;

using static Parlot.Fluent.Parsers;
Expand Down
Loading