From 3f1a0fc1a79128e1d9ec499db004d38ee29fb62f Mon Sep 17 00:00:00 2001 From: Sebastien Ros Date: Mon, 24 Nov 2025 09:27:42 -0800 Subject: [PATCH] Improve performance --- src/Parlot/Fluent/HybridList.cs | 115 +++++++ src/Parlot/Fluent/OneOrMany.cs | 2 +- .../Fluent/ParserExtensions.WhiteSpace.cs | 46 ++- src/Parlot/Fluent/Separated.cs | 4 +- src/Parlot/Fluent/When.cs | 21 +- src/Parlot/Fluent/ZeroOrMany.cs | 8 +- src/Samples/Sql/SqlAst.cs | 19 +- src/Samples/Sql/SqlParser.cs | 31 +- test/Parlot.Benchmarks/SqlParserBenchmarks.cs | 54 ++++ test/Parlot.Tests/CommentTests.cs | 1 + test/Parlot.Tests/HybridListTests.cs | 299 ++++++++++++++++++ test/Parlot.Tests/Parlot.Tests.csproj | 4 + 12 files changed, 566 insertions(+), 38 deletions(-) create mode 100644 src/Parlot/Fluent/HybridList.cs create mode 100644 test/Parlot.Benchmarks/SqlParserBenchmarks.cs create mode 100644 test/Parlot.Tests/HybridListTests.cs diff --git a/src/Parlot/Fluent/HybridList.cs b/src/Parlot/Fluent/HybridList.cs new file mode 100644 index 00000000..f793aaa9 --- /dev/null +++ b/src/Parlot/Fluent/HybridList.cs @@ -0,0 +1,115 @@ +using System; +using System.Collections; +using System.Collections.Generic; + +namespace Parlot.Fluent; + +/// +/// An internal implementation of IReadOnlyList<T> that stores up to 4 items inline +/// before switching to a List<T> for growth. +/// This provides efficient memory usage for small result sets while maintaining +/// flexibility for larger lists. +/// +#nullable enable +internal sealed class HybridList : IReadOnlyList +{ + private T? _item1; + private T? _item2; + private T? _item3; + private T? _item4; + private List? _list; + private int _count; + + public int Count => _count; + + public T this[int index] + { + get + { + if (index < 0 || index >= _count) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + if (_list is not null) + { + return _list[index]; + } + + return index switch + { + 0 => _item1!, + 1 => _item2!, + 2 => _item3!, + 3 => _item4!, + _ => throw new ArgumentOutOfRangeException(nameof(index)) + }; + } + } + + public void Add(T item) + { + if (_list is not null) + { + _list.Add(item); + _count++; + } + else + { + switch (_count) + { + case 0: + _item1 = item; + _count++; + break; + case 1: + _item2 = item; + _count++; + break; + case 2: + _item3 = item; + _count++; + break; + case 3: + _item4 = item; + _count++; + break; + case 4: + // Transition to List + _list = new List(8) { _item1!, _item2!, _item3!, _item4!, item }; + _item1 = default; + _item2 = default; + _item3 = default; + _item4 = default; + _count++; + break; + default: + throw new InvalidOperationException("Unexpected count value"); + } + } + } + + public IEnumerator GetEnumerator() + { + if (_list is not null) + { + return _list.GetEnumerator(); + } + + return GetEnumeratorInternal(); + } + + private IEnumerator GetEnumeratorInternal() + { + if (_count >= 1) + yield return _item1!; + if (_count >= 2) + yield return _item2!; + if (_count >= 3) + yield return _item3!; + if (_count >= 4) + yield return _item4!; + } + + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); +} diff --git a/src/Parlot/Fluent/OneOrMany.cs b/src/Parlot/Fluent/OneOrMany.cs index ffd5d001..cc22e757 100644 --- a/src/Parlot/Fluent/OneOrMany.cs +++ b/src/Parlot/Fluent/OneOrMany.cs @@ -42,7 +42,7 @@ public override bool Parse(ParseContext context, ref ParseResult(); + var results = new HybridList(); int end; diff --git a/src/Parlot/Fluent/ParserExtensions.WhiteSpace.cs b/src/Parlot/Fluent/ParserExtensions.WhiteSpace.cs index d66fc45a..d29bba9d 100644 --- a/src/Parlot/Fluent/ParserExtensions.WhiteSpace.cs +++ b/src/Parlot/Fluent/ParserExtensions.WhiteSpace.cs @@ -22,9 +22,20 @@ public static Parser WithWhiteSpaceParser(this Parser parser, ParserThe parser to execute with the custom whitespace parser. /// The action to configure the comments builder. /// A parser that uses white spaces, new lines and comments. + /// + /// Here is an example of usage: + /// + /// var parserWithComments = myParser.WithComments(comments => + /// { + /// comments.WithWhiteSpaceOrNewLine(); + /// comments.WithSingleLine("//"); + /// comments.WithMultiLine("/*", "*/"); + /// }); + /// + /// public static Parser WithComments(this Parser parser, Action commentsBuilder) { - var builder = new CommentsBuilder(Literals.WhiteSpace(includeNewLines: true)); + var builder = new CommentsBuilder(); commentsBuilder(builder); return new WithWhiteSpaceParser(parser, builder.Build()); } @@ -34,23 +45,48 @@ public class CommentsBuilder { private readonly List> _parsers = []; + [Obsolete("Use CommentsBuilder().WithParser(parser) instead.")] public CommentsBuilder(Parser whiteSpaceParser) { _parsers.Add(whiteSpaceParser); } - public Parser WithSingleLine(string singleLineStart) + public CommentsBuilder() + { + } + + public CommentsBuilder WithWhiteSpace() + { + var parser = Literals.WhiteSpace(); + _parsers.Add(parser); + return this; + } + + public CommentsBuilder WithWhiteSpaceOrNewLine() + { + var parser = Literals.WhiteSpace(includeNewLines: true); + _parsers.Add(parser); + return this; + } + + public CommentsBuilder WithParser(Parser parser) + { + _parsers.Add(parser); + return this; + } + + public CommentsBuilder WithSingleLine(string singleLineStart) { var parser = Literals.Comments(singleLineStart); _parsers.Add(parser); - return parser; + return this; } - public Parser WithMultiLine(string multiLineStart, string multiLineEnd) + public CommentsBuilder WithMultiLine(string multiLineStart, string multiLineEnd) { var parser = Literals.Comments(multiLineStart, multiLineEnd); _parsers.Add(parser); - return parser; + return this; } public Parser Build() diff --git a/src/Parlot/Fluent/Separated.cs b/src/Parlot/Fluent/Separated.cs index c463ad3f..5494708e 100644 --- a/src/Parlot/Fluent/Separated.cs +++ b/src/Parlot/Fluent/Separated.cs @@ -37,7 +37,7 @@ public override bool Parse(ParseContext context, ref ParseResult? results = null; + HybridList? results = null; var start = 0; var end = context.Scanner.Cursor.Position; @@ -84,7 +84,7 @@ public override bool Parse(ParseContext context, ref ParseResult)[]); context.ExitParser(this); return true; diff --git a/src/Parlot/Fluent/When.cs b/src/Parlot/Fluent/When.cs index 8472bdc2..0be04447 100644 --- a/src/Parlot/Fluent/When.cs +++ b/src/Parlot/Fluent/When.cs @@ -1,4 +1,5 @@ using Parlot.Compilation; +using Parlot.Rewriting; using System; #if NET using System.Linq; @@ -11,7 +12,7 @@ namespace Parlot.Fluent; /// Ensure the given parser is valid based on a condition, and backtracks if not. /// /// The output parser type. -public sealed class When : Parser, ICompilable +public sealed class When : Parser, ICompilable, ISeekable { private readonly Func _action; private readonly Parser _parser; @@ -21,14 +22,32 @@ public When(Parser parser, Func action) { _action = action != null ? (c, t) => action(t) : throw new ArgumentNullException(nameof(action)); _parser = parser ?? throw new ArgumentNullException(nameof(parser)); + InitializeSeekable(); } public When(Parser parser, Func action) { _action = action ?? throw new ArgumentNullException(nameof(action)); _parser = parser ?? throw new ArgumentNullException(nameof(parser)); + InitializeSeekable(); } + private void InitializeSeekable() + { + if (_parser is ISeekable seekable) + { + CanSeek = seekable.CanSeek; + ExpectedChars = seekable.ExpectedChars; + SkipWhitespace = seekable.SkipWhitespace; + } + } + + public bool CanSeek { get; private set; } + + public char[] ExpectedChars { get; private set; } = []; + + public bool SkipWhitespace { get; private set; } + public override bool Parse(ParseContext context, ref ParseResult result) { context.EnterParser(this); diff --git a/src/Parlot/Fluent/ZeroOrMany.cs b/src/Parlot/Fluent/ZeroOrMany.cs index a875342e..c2771ec1 100644 --- a/src/Parlot/Fluent/ZeroOrMany.cs +++ b/src/Parlot/Fluent/ZeroOrMany.cs @@ -21,7 +21,7 @@ public override bool Parse(ParseContext context, ref ParseResult? results = null; + HybridList? results = null; var start = 0; var end = 0; @@ -36,14 +36,14 @@ public override bool Parse(ParseContext context, ref ParseResult)[]); diff --git a/src/Samples/Sql/SqlAst.cs b/src/Samples/Sql/SqlAst.cs index e6f0bff2..c59b7dd2 100644 --- a/src/Samples/Sql/SqlAst.cs +++ b/src/Samples/Sql/SqlAst.cs @@ -1,6 +1,8 @@ #nullable enable using System.Collections.Generic; +using System.Collections.Specialized; +using System.ComponentModel.Design; using System.Linq; namespace Parlot.Tests.Sql; @@ -529,8 +531,12 @@ public ParameterExpression(Identifier name, Expression? defaultValue = null) } // Identifiers -public class Identifier : ISqlNode +public sealed class Identifier : ISqlNode { + public static readonly Identifier STAR = new (["*"]); + + private string _cachedToString = null!; + public IReadOnlyList Parts { get; } public Identifier(IReadOnlyList parts) @@ -538,17 +544,8 @@ public Identifier(IReadOnlyList parts) Parts = parts; } - public Identifier(string name) : this(new[] { name }) - { - } - - public Identifier(params string[] parts) - { - Parts = parts.Where(p => !string.IsNullOrWhiteSpace(p)).ToArray(); - } - public override string ToString() { - return string.Join(".", Parts); + return _cachedToString ??= (Parts.Count == 1 ? Parts[0] : string.Join(".", Parts)); } } \ No newline at end of file diff --git a/src/Samples/Sql/SqlParser.cs b/src/Samples/Sql/SqlParser.cs index ab8445a2..4e35bcc3 100644 --- a/src/Samples/Sql/SqlParser.cs +++ b/src/Samples/Sql/SqlParser.cs @@ -70,23 +70,23 @@ static SqlParser() var numberLiteral = Terms.Decimal().Then(d => new LiteralExpression(d)); var stringLiteral = Terms.String(StringLiteralQuotes.Single) - .Then(s => new LiteralExpression(s.Span.ToString())); + .Then(s => new LiteralExpression(s.ToString())); var booleanLiteral = TRUE.Then(new LiteralExpression(true)) .Or(FALSE.Then(new LiteralExpression(false))); // Identifiers - var simpleIdentifier = Terms.Identifier() - .Or(Between(Terms.Char('['), Literals.NoneOf("]"), Terms.Char(']'))) - .Or(Between(Terms.Char('"'), Literals.NoneOf("\""), Terms.Char('"'))); + var simpleIdentifier = Terms.Identifier().Then(x => x.ToString()) + .Or(Between(Terms.Char('['), Literals.NoneOf("]"), Terms.Char(']')).Then(x => x.ToString())) + .Or(Between(Terms.Char('"'), Literals.NoneOf("\""), Terms.Char('"')).Then(x => x.ToString())); var identifier = Separated(DOT, simpleIdentifier) - .Then(parts => new Identifier(parts.Select(p => p.Span.ToString()).ToArray())); + .Then(parts => new Identifier(parts)); // Without the keywords check "FROM a WHERE" would interpret "WHERE" as an alias since "AS" is optional - var identifierNoKeywords = Separated(DOT, simpleIdentifier).When((ctx, parts) => parts.Count > 0 && !keywords.Contains(parts[0].ToString())) - .Then(parts => new Identifier(parts.Select(p => p.Span.ToString()).ToArray())); - + var identifierNoKeywords = Separated(DOT, simpleIdentifier).When((ctx, parts) => parts.Count > 0 && !keywords.Contains(parts[0])) + .Then(parts => new Identifier(parts)); + // Deferred parsers var expression = Deferred(); var selectStatement = Deferred(); @@ -212,7 +212,7 @@ static SqlParser() var columnSourceId = identifier.Then(id => new ColumnSourceIdentifier(id)); // Deferred for OVER clause components - var columnItemList = Separated(COMMA, columnItem.Or(STAR.Then(new ColumnItem(new ColumnSourceIdentifier(new Identifier("*")), null)))); + var columnItemList = Separated(COMMA, columnItem.Or(STAR.Then(new ColumnItem(new ColumnSourceIdentifier(Identifier.STAR), null)))); var orderByList = Separated(COMMA, orderByItem); var orderByClause = ORDER.AndSkip(BY).And(orderByList) @@ -365,9 +365,9 @@ static SqlParser() ); }); + // WITH clause (CTEs) - var columnNames = Separated(COMMA, simpleIdentifier) - .Then(names => names.Select(n => n.Span.ToString()).ToArray()); + var columnNames = Separated(COMMA, simpleIdentifier); var cteColumnList = Between(LPAREN, columnNames, RPAREN); @@ -378,7 +378,7 @@ static SqlParser() .Then(result => { var (name, columns, query) = result; - return new CommonTableExpression(name.ToString(), query, columns.OrSome(null)); + return new CommonTableExpression(name, query, columns.OrSome(null)); }); var cteList = Separated(COMMA, cte); @@ -418,8 +418,11 @@ static SqlParser() Statements = statementList.WithComments(comments => { - comments.WithSingleLine("--"); - comments.WithMultiLine("/*", "*/"); + comments + .WithWhiteSpaceOrNewLine() + .WithSingleLine("--") + .WithMultiLine("/*", "*/") + ; }); } diff --git a/test/Parlot.Benchmarks/SqlParserBenchmarks.cs b/test/Parlot.Benchmarks/SqlParserBenchmarks.cs new file mode 100644 index 00000000..f669ad22 --- /dev/null +++ b/test/Parlot.Benchmarks/SqlParserBenchmarks.cs @@ -0,0 +1,54 @@ +using System; +using System.Diagnostics.Tracing; +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Configs; +using BenchmarkDotNet.Diagnosers; +using BenchmarkDotNet.Jobs; +using BenchmarkDotNet.Running; +using Microsoft.Diagnostics.NETCore.Client; +using Microsoft.Diagnostics.Tracing.Parsers; +using Parlot.Tests.Sql; + +namespace Parlot.Benchmarks; + +[MemoryDiagnoser, ShortRunJob] +// [Config(typeof(CustomConfig))] +public class SqlParserBenchmarks +{ + // private class CustomConfig : ManualConfig + // { + // public CustomConfig() + // { + // AddJob(Job.ShortRun); + + // var providers = new[] + // { + // new EventPipeProvider(ClrTraceEventParser.ProviderName, EventLevel.Verbose, + // (long) ( + // ClrTraceEventParser.Keywords.GCAllObjectAllocation + // )) + // }; + + // AddDiagnoser(new EventPipeProfiler(providers: providers)); + // } + // } + + [Params( + "select a where a not like '%foo%'" + // "select a where b = (select Avg(c) from d)" + )] + public string Sql { get; set; } = string.Empty; + + [Benchmark] + public bool ParseExpression() + { + var result = SqlParser.TryParse(Sql, out var statementList, out var error); + + if (statementList is null || error is not null) + { + throw new InvalidOperationException($"Parsing failed: {error}"); + } + + return result; + } +} diff --git a/test/Parlot.Tests/CommentTests.cs b/test/Parlot.Tests/CommentTests.cs index c1e51068..452a0e63 100644 --- a/test/Parlot.Tests/CommentTests.cs +++ b/test/Parlot.Tests/CommentTests.cs @@ -101,6 +101,7 @@ public void ShouldParseAllComments(string text) var comments = Terms.Text("hello").And(Terms.Text("world")) .WithComments(builder => { + builder.WithWhiteSpaceOrNewLine(); builder.WithSingleLine("--"); builder.WithSingleLine("#"); builder.WithMultiLine("/*", "*/"); diff --git a/test/Parlot.Tests/HybridListTests.cs b/test/Parlot.Tests/HybridListTests.cs new file mode 100644 index 00000000..c03c138d --- /dev/null +++ b/test/Parlot.Tests/HybridListTests.cs @@ -0,0 +1,299 @@ +using Parlot.Fluent; +using System.Collections.Generic; +using Xunit; + +#nullable enable + +namespace Parlot.Tests; + +public class HybridListTests +{ + [Fact] + public void Empty_InitiallyEmpty() + { + var list = new HybridList(); + Assert.Empty(list); + } + + [Fact] + public void Add_SingleItem() + { + var list = new HybridList(); + list.Add(1); + + Assert.Single(list); + Assert.Equal(1, list[0]); + } + + [Fact] + public void Add_FourItems_InlineStorage() + { + var list = new HybridList(); + list.Add(1); + list.Add(2); + list.Add(3); + list.Add(4); + + Assert.Equal(4, list.Count); + Assert.Equal(1, list[0]); + Assert.Equal(2, list[1]); + Assert.Equal(3, list[2]); + Assert.Equal(4, list[3]); + } + + [Fact] + public void Add_FiveItems_TransitionsToList() + { + var list = new HybridList(); + list.Add(1); + list.Add(2); + list.Add(3); + list.Add(4); + list.Add(5); + + Assert.Equal(5, list.Count); + Assert.Equal(1, list[0]); + Assert.Equal(2, list[1]); + Assert.Equal(3, list[2]); + Assert.Equal(4, list[3]); + Assert.Equal(5, list[4]); + } + + [Fact] + public void Add_ManyItems_AfterTransition() + { + var list = new HybridList(); + for (int i = 1; i <= 10; i++) + { + list.Add(i); + } + + Assert.Equal(10, list.Count); + for (int i = 0; i < 10; i++) + { + Assert.Equal(i + 1, list[i]); + } + } + + [Fact] + public void Indexer_OutOfRange_ThrowsException() + { + var list = new HybridList(); + list.Add(1); + + Assert.Throws(() => list[-1]); + Assert.Throws(() => list[1]); + } + + [Fact] + public void Indexer_OutOfRange_AfterTransition_ThrowsException() + { + var list = new HybridList(); + for (int i = 1; i <= 5; i++) + { + list.Add(i); + } + + Assert.Throws(() => list[-1]); + Assert.Throws(() => list[5]); + } + + [Fact] + public void Enumerate_InlineStorage() + { + var list = new HybridList(); + list.Add(10); + list.Add(20); + list.Add(30); + + var items = new List(); + foreach (var item in list) + { + items.Add(item); + } + + Assert.Equal(3, items.Count); + Assert.Equal(10, items[0]); + Assert.Equal(20, items[1]); + Assert.Equal(30, items[2]); + } + + [Fact] + public void Enumerate_AfterTransition() + { + var list = new HybridList(); + for (int i = 1; i <= 7; i++) + { + list.Add(i); + } + + var items = new List(); + foreach (var item in list) + { + items.Add(item); + } + + Assert.Equal(7, items.Count); + for (int i = 0; i < 7; i++) + { + Assert.Equal(i + 1, items[i]); + } + } + + [Fact] + public void GetEnumerator_InlineStorage_Empty() + { + var list = new HybridList(); + + var items = new List(); + foreach (var item in list) + { + items.Add(item); + } + + Assert.Empty(items); + } + + [Fact] + public void Add_WithStrings() + { + var list = new HybridList(); + list.Add("alpha"); + list.Add("beta"); + list.Add("gamma"); + + Assert.Equal(3, list.Count); + Assert.Equal("alpha", list[0]); + Assert.Equal("beta", list[1]); + Assert.Equal("gamma", list[2]); + } + + [Fact] + public void Add_WithNullValues() + { + var list = new HybridList(); + list.Add(null); + list.Add("value"); + list.Add(null); + + Assert.Equal(3, list.Count); + Assert.Null(list[0]); + Assert.Equal("value", list[1]); + Assert.Null(list[2]); + } + + [Fact] + public void Enumerate_WithNullValues() + { + var list = new HybridList(); + list.Add(null); + list.Add("a"); + list.Add(null); + list.Add("b"); + list.Add(null); + + var items = new List(); + foreach (var item in list) + { + items.Add(item); + } + + Assert.Equal(5, items.Count); + Assert.Null(items[0]); + Assert.Equal("a", items[1]); + Assert.Null(items[2]); + Assert.Equal("b", items[3]); + Assert.Null(items[4]); + } + + [Fact] + public void Add_LargeCount() + { + var list = new HybridList(); + const int count = 1000; + + for (int i = 0; i < count; i++) + { + list.Add(i); + } + + Assert.Equal(count, list.Count); + for (int i = 0; i < count; i++) + { + Assert.Equal(i, list[i]); + } + } + + [Fact] + public void Enumerate_LargeCount() + { + var list = new HybridList(); + const int count = 1000; + + for (int i = 0; i < count; i++) + { + list.Add(i); + } + + var index = 0; + foreach (var item in list) + { + Assert.Equal(index, item); + index++; + } + + Assert.Equal(count, index); + } + + [Fact] + public void IReadOnlyListInterface() + { + IReadOnlyList list = new HybridList(); + ((HybridList)(object)list).Add(1); + ((HybridList)(object)list).Add(2); + ((HybridList)(object)list).Add(3); + + Assert.Equal(3, list.Count); + Assert.Equal(1, list[0]); + Assert.Equal(2, list[1]); + Assert.Equal(3, list[2]); + + var items = new List(); + foreach (var item in list) + { + items.Add(item); + } + Assert.Equal(3, items.Count); + } + + [Fact] + public void TransitionPoint_ExactlyAtFourItems() + { + var list = new HybridList(); + list.Add(100); + list.Add(200); + list.Add(300); + list.Add(400); + + // Should still use inline storage + Assert.Equal(4, list.Count); + Assert.Equal(100, list[0]); + Assert.Equal(400, list[3]); + } + + [Fact] + public void TransitionPoint_JustAfterFourItems() + { + var list = new HybridList(); + list.Add(100); + list.Add(200); + list.Add(300); + list.Add(400); + list.Add(500); + + // Should have transitioned to List + Assert.Equal(5, list.Count); + Assert.Equal(100, list[0]); + Assert.Equal(500, list[4]); + } +} diff --git a/test/Parlot.Tests/Parlot.Tests.csproj b/test/Parlot.Tests/Parlot.Tests.csproj index a22f1fc1..3db2ff0d 100644 --- a/test/Parlot.Tests/Parlot.Tests.csproj +++ b/test/Parlot.Tests/Parlot.Tests.csproj @@ -12,6 +12,10 @@ + + + +