Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/Parlot/Fluent/Between.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@

namespace Parlot.Fluent;

/// <summary>
/// This parser parses a value between two other parsers. It returns the value parsed by the middle parser
/// making it easier to skip delimiters than writing <code>a.SkipAnd(b).AndSkip(c)</code>.
/// </summary>
/// <typeparam name="A">The type of the parser before the main parser.</typeparam>
/// <typeparam name="T">The type of the value parsed by the main parser.</typeparam>
/// <typeparam name="B">The type of the parser after the main parser.</typeparam>
public sealed class Between<A, T, B> : Parser<T>, ICompilable, ISeekable
{
private readonly Parser<T> _parser;
Expand Down
2 changes: 1 addition & 1 deletion src/Parlot/Fluent/Parsers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public static partial class Parsers
public static Parser<T> Select<T>(Func<ParseContext, Parser<T>> selector) => new Select<ParseContext, T>(selector);

/// <summary>
/// Builds a parser that can be defined later one. Use it when a parser need to be declared before its rule can be set.
/// Builds a parser that can be defined later on. Use it when a parser need to be declared before its rule can be set.
/// </summary>
public static Deferred<T> Deferred<T>() => new();

Expand Down
55 changes: 26 additions & 29 deletions src/Parlot/Scanner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,22 +39,20 @@ public bool SkipWhiteSpaceOrNewLine()
return false;
}

SkipRemainingWhiteSpaceOrNewLine();
return true;
}

private void SkipRemainingWhiteSpaceOrNewLine()
{
var span = Cursor.Span;
var length = span.Length;

for (var i = 1; i < length; i++)
{
var c = span[i];
var i = 0;

if (!Character.IsWhiteSpaceOrNewLine(c))
{
Cursor.Advance(i);
return true;
}
}
while (++i < length && Character.IsWhiteSpaceOrNewLine(span[i])) ;

Cursor.Advance(span.Length);
return true;
Cursor.Advance(i);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
Expand All @@ -64,34 +62,30 @@ public bool SkipWhiteSpace()
{
return false;
}


// Splitting such that not all code is inlined
SkipRemainingWhiteSpace();
return true;
}

private void SkipRemainingWhiteSpace()
{
var span = Cursor.Span;
var length = span.Length;

for (var i = 1; i < length; i++)
{
var c = span[i];

if (!Character.IsWhiteSpace(c))
{
if (i > 0)
{
Cursor.AdvanceNoNewLines(i);
return true;
}
var i = 0;

return false;
}
}
while (++i < length && Character.IsWhiteSpace(span[i])) ;

Cursor.AdvanceNoNewLines(span.Length);
return true;
Cursor.AdvanceNoNewLines(i);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[Obsolete("Not optimized")]
public bool ReadFirstThenOthers(Func<char, bool> first, Func<char, bool> other)
=> ReadFirstThenOthers(first, other, out _);

[Obsolete("Not optimized")]
public bool ReadFirstThenOthers(Func<char, bool> first, Func<char, bool> other, out ReadOnlySpan<char> result)
{
if (!first(Cursor.Current))
Expand All @@ -114,8 +108,10 @@ public bool ReadFirstThenOthers(Func<char, bool> first, Func<char, bool> other,
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[Obsolete("Should not be part of the Scanner")]
public bool ReadIdentifier() => ReadIdentifier(out _);

[Obsolete("Should not be part of the Scanner")]
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What should be used instead? A note on that would be useful here.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The parsers already expose Identifier methods.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's a weird thing to do. This should be provided by the shortcode library. To fix this I would suggest to build an actual shortcode out of the provided name and try to parse it "successfully". My sarcastic mind is glad it exposed bad usages of this ;)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

public bool ReadIdentifier(out ReadOnlySpan<char> result)
{
// perf: using Character.IsIdentifierStart instead of x => Character.IsIdentifierStart(x) induces some allocations
Expand Down Expand Up @@ -357,11 +353,12 @@ public bool ReadChar(char c)
/// <summary>
/// Reads the specified text.
/// </summary>
[Obsolete("Prefer bool ReadChar(char)")]
public bool ReadChar(char c, out ReadOnlySpan<char> result)
{
if (!Cursor.Match(c))
{
result = [];
result = default;
return false;
}

Expand Down
156 changes: 29 additions & 127 deletions test/Parlot.Benchmarks/SkipWhiteSpaceBenchmarks.cs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ namespace Parlot.Benchmarks;
public class SkipWhiteSpaceBenchmarks
{
private string _source;
private Scanner _scanner;

[Params(0, 1, 2, 10)]
public int Length { get; set; }
Expand All @@ -53,32 +54,26 @@ public class SkipWhiteSpaceBenchmarks
public void Setup()
{
_source = new string(' ', Length) + "a";
_scanner = new Scanner(_source);
}

[Benchmark, BenchmarkCategory("DefaultImplementation")]
[Benchmark, BenchmarkCategory("SkipWhiteSpace")]
public bool SkipWhiteSpace_Default()
{
return new Scanner(_source).SkipWhiteSpace();
return _scanner.SkipWhiteSpace();
}

[Benchmark, BenchmarkCategory("DefaultImplementation")]
[Benchmark, BenchmarkCategory("SkipWhiteSpaceNewLines")]
public bool SkipWhiteSpaceOrNewLine_Default()
{
return new Scanner(_source).SkipWhiteSpaceOrNewLine();
return _scanner.SkipWhiteSpaceOrNewLine();
}

#if NET8_0_OR_GREATER
[Benchmark, BenchmarkCategory("Vectorized")]
[Benchmark, BenchmarkCategory("SkipWhiteSpace")]
public bool SkipWhiteSpace_Vectorized()
{
var scanner = new Scanner(_source);
var cursor = scanner.Cursor;

if (cursor.Eof)
{
return false;
}

var cursor = _scanner.Cursor;
var span = cursor.Span;

var index = span.IndexOfAnyExcept(SearchValuesHelper._whiteSpaces);
Expand All @@ -98,17 +93,10 @@ public bool SkipWhiteSpace_Vectorized()
}
}

[Benchmark, BenchmarkCategory("Vectorized")]
[Benchmark, BenchmarkCategory("SkipWhiteSpaceNewLines")]
public bool SkipWhiteSpaceOrNewLines_Vectorized()
{
var scanner = new Scanner(_source);
var cursor = scanner.Cursor;

if (cursor.Eof)
{
return false;
}

var cursor = _scanner.Cursor;
var span = cursor.Span;

var index = span.IndexOfAnyExcept(SearchValuesHelper._whiteSpaceOrNewLines);
Expand All @@ -127,120 +115,34 @@ public bool SkipWhiteSpaceOrNewLines_Vectorized()
}
}

[Benchmark, BenchmarkCategory("PeekSearchValue")]
public bool SkipWhiteSpace_PeekSearchValue()
[Benchmark, BenchmarkCategory("SkipWhiteSpace")]
public bool SkipWhiteSpace_Vectorized_Optimized()
{
var scanner = new Scanner(_source);
var cursor = scanner.Cursor;

var cursor = _scanner.Cursor;
var span = cursor.Span;
var length = span.Length;

for (var i = 0; i < length; i++)
{
var c = span[i];

if (!SearchValuesHelper._whiteSpaces.Contains(c))
{
if (i > 0)
{
cursor.AdvanceNoNewLines(i);
return true;
}

return false;
}
}

cursor.AdvanceNoNewLines(span.Length);
return true;
}

[Benchmark, BenchmarkCategory("PeekSearchValue")]
public bool SkipWhiteSpaceOrNewLines_PeekSearchValue()
{
var scanner = new Scanner(_source);
var cursor = scanner.Cursor;
// Check ASCII first
var index = span.IndexOfAnyExcept(SearchValuesHelper._whiteSpacesAscii);

var span = cursor.Span;
var length = span.Length;

for (var i = 0; i < length; i++)
// If we found a non-ASCII character, we need to check the full set
if (index > 0 && index < span.Length && span[index] > 127)
{
var c = span[i];

if (!SearchValuesHelper._whiteSpaceOrNewLines.Contains(c))
{
if (i > 0)
{
cursor.Advance(i);
return true;
}

return false;
}
index = span.Slice(index).IndexOfAnyExcept(SearchValuesHelper._whiteSpaces);
}

cursor.AdvanceNoNewLines(span.Length);
return true;
}

[Benchmark, BenchmarkCategory("PeekCharacter")]
public bool SkipWhiteSpace_PeekCharacter()
{
var scanner = new Scanner(_source);
var cursor = scanner.Cursor;

var span = cursor.Span;
var length = span.Length;

for (var i = 0; i < length; i++)
{
var c = span[i];

if (!Character.IsWhiteSpace(c))
{
if (i > 0)
{
cursor.AdvanceNoNewLines(i);
return true;
}

return false;
}
}

cursor.AdvanceNoNewLines(span.Length);
return true;
}

[Benchmark, BenchmarkCategory("PeekCharacter")]
public bool SkipWhiteSpaceOrNewLines_PeekCharacter()
{
var scanner = new Scanner(_source);
var cursor = scanner.Cursor;

var span = cursor.Span;
var length = span.Length;

for (var i = 0; i < length; i++)

// Only spaces ?
// Not tracking new lines since we know these are only spaces
switch (index)
{
var c = span[i];

if (!Character.IsWhiteSpaceOrNewLine(c))
{
if (i > 0)
{
cursor.Advance(i);
return true;
}

case 0:
return false;
}
case -1:
cursor.AdvanceNoNewLines(span.Length);
return true;
default:
cursor.AdvanceNoNewLines(index);
return true;
}

cursor.AdvanceNoNewLines(span.Length);
return true;
}
#endif
}
35 changes: 27 additions & 8 deletions test/Parlot.Tests/ScannerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,32 @@ public void SkipWhitespaceShouldSkipWhitespace()
Assert.False(new Scanner("a").SkipWhiteSpace());
}

[Theory]
[InlineData("Loremipsum")]
[InlineData("a")]
[InlineData("")]
[InlineData("\r\n\t")]
[InlineData("\n\t")]
public void SkipWhiteSpaceWithNoSpacesReturnsFalse(string text)
{
Scanner s = new(text);
Assert.False(s.SkipWhiteSpace());
}

[Theory]
[InlineData(" Loremipsum")]
[InlineData(" a")]
[InlineData("\t")]
public void SkipWhiteSpaceWithSpacesReturnsTrue(string text)
{
Scanner s = new(text);
Assert.True(s.SkipWhiteSpace(), text);
}

[Fact]
public void ReadIdentifierShouldReadIdentifier()
{
#pragma warning disable CS0618 // Type or member is obsolete
Scanner s = new("a $abc 123");

Assert.True(s.ReadIdentifier(out var result));
Expand All @@ -178,21 +201,17 @@ public void ReadIdentifierShouldReadIdentifier()
s.SkipWhiteSpace();

Assert.False(s.ReadIdentifier());
#pragma warning restore CS0618 // Type or member is obsolete
}

[Fact]
public void ReadCharShouldReadSingleChar()
{
Scanner s = new("aaa");

Assert.True(s.ReadChar('a', out var result));
Assert.Equal("a", result);

Assert.True(s.ReadChar('a', out result));
Assert.Equal("a", result);

Assert.True(s.ReadChar('a', out result));
Assert.Equal("a", result);
Assert.True(s.ReadChar('a'));
Assert.True(s.ReadChar('a'));
Assert.True(s.ReadChar('a'));

Assert.False(s.ReadChar('a'));
}
Expand Down