Skip to content

Commit

Permalink
Additional commenting and PR feedback. I've simplified the reset loop…
Browse files Browse the repository at this point in the history
… to just be from the back of the results, as that's the most common order for the parser to reset in. I've also refactored a common advance loop to reduce duplication.
  • Loading branch information
333fred committed Aug 6, 2024
1 parent 1a0d57d commit aa24f69
Showing 1 changed file with 55 additions and 82 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Diagnostics;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.Text;

using SyntaxToken = Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax.SyntaxToken;
using SyntaxFactory = Microsoft.AspNetCore.Razor.Language.Syntax.InternalSyntax.SyntaxFactory;
Expand All @@ -16,11 +17,20 @@
namespace Microsoft.AspNetCore.Razor.Language.Legacy;

#pragma warning disable RSEXPERIMENTAL003 // SyntaxTokenParser is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
internal class RoslynCSharpTokenizer : CSharpTokenizer
internal sealed class RoslynCSharpTokenizer : CSharpTokenizer
{
private readonly SyntaxTokenParser _roslynTokenParser;
private readonly List<(int position, SyntaxTokenParser.Result result)> _resultCache = new();
/// <summary>
/// The current trivia enumerator that we're parsing through. This is a tuple of the enumerator and whether it's leading trivia.
/// When this is non-null, we're in the <see cref="RoslynCSharpTokenizerState.TriviaForCSharpToken"/> state.
/// </summary>
private (CSharpSyntaxTriviaList.Enumerator enumerator, bool isLeading)? _currentCSharpTokenTriviaEnumerator;
/// <summary>
/// Previous result checkpoints that we can reset <see cref="_roslynTokenParser"/> to. This must be an ordered list
/// by position, where the position is the start of the token that was parsed including leading trivia, so that searching
/// is correct when performing a reset.
/// </summary>
private readonly List<(int position, SyntaxTokenParser.Result result)> _resultCache = [];

public RoslynCSharpTokenizer(SeekableTextReader source)
: base(source)
Expand Down Expand Up @@ -272,17 +282,10 @@ private StateResult AtToken()

private StateResult Operator()
{
var curPosition = Source.Position;
var result = GetNextResult(NextResultType.Token);
var token = result.Token;

// Don't include trailing trivia; we handle that differently than Roslyn
var finalPosition = curPosition + token.Span.Length;

for (; curPosition < finalPosition; curPosition++)
{
TakeCurrent();
}
AdvancePastToken(token);

SyntaxKind kind;
string content;
Expand Down Expand Up @@ -378,11 +381,8 @@ void TakeTokenContent(CSharpSyntaxToken token, out string content)

private StateResult TokenizedExpectedStringOrCharacterLiteral(StringOrCharacterKind expectedStringKind)
{
var curPosition = Source.Position;
var result = GetNextResult(NextResultType.Token);
var csharpToken = result.Token;
// Don't include trailing trivia; we handle that differently than Roslyn
var finalPosition = curPosition + csharpToken.Span.Length;
(string expectedPrefix, string expectedPostfix, bool lookForPrePostFix) = expectedStringKind switch
{
StringOrCharacterKind.Character => ("'", "'", false),
Expand All @@ -395,7 +395,7 @@ private StateResult TokenizedExpectedStringOrCharacterLiteral(StringOrCharacterK
_ => throw new InvalidOperationException($"Unexpected expectedStringKind: {expectedStringKind}."),
};

for (; curPosition < finalPosition; curPosition++)
for (var finalPosition = Source.Position + csharpToken.Span.Length; Source.Position < finalPosition;)
{
if (lookForPrePostFix)
{
Expand All @@ -405,7 +405,7 @@ private StateResult TokenizedExpectedStringOrCharacterLiteral(StringOrCharacterK
TakeCurrent();
}

// If the token is the expected kind and has the expected prefix or doesn't have the expected postfix, then it's unterminated.
// If the token is the expected kind and is only the expected prefix or doesn't have the expected postfix, then it's unterminated.
// This is a case like `"test` (which doesn't end in the expected postfix), or `"` (which ends in the expected postfix, but
// exactly matches the expected prefix).
if (lookForPrePostFix || csharpToken.Text == expectedPrefix || !csharpToken.Text.EndsWith(expectedPostfix!, StringComparison.Ordinal))
Expand Down Expand Up @@ -503,7 +503,6 @@ private StateResult Trivia()
// Need to make sure the class state is correct, since structs are copied
_currentCSharpTokenTriviaEnumerator = (triviaEnumerator, isLeading);

var curPosition = Source.Position;
var trivia = triviaEnumerator.Current;
var triviaString = trivia.ToFullString();

Expand All @@ -519,12 +518,7 @@ private StateResult Trivia()
}

// Use FullSpan here because doc comment trivias exclude the leading `///` or `/**` and the trailing `*/`
var finalPosition = curPosition + trivia.FullSpan.Length;

for (; curPosition < finalPosition; curPosition++)
{
TakeCurrent();
}
AdvancePastSpan(trivia.FullSpan);

if (EndOfFile
&& trivia.Kind() is CSharpSyntaxKind.MultiLineCommentTrivia or CSharpSyntaxKind.MultiLineDocumentationCommentTrivia
Expand Down Expand Up @@ -560,16 +554,9 @@ private StateResult OnRazorCommentStar()
// CSharp Spec §2.4.4
private StateResult NumericLiteral()
{
var curPosition = Source.Position;
var result = GetNextResult(NextResultType.Token);
var csharpToken = result.Token;
// Don't include trailing trivia; we handle that differently than Roslyn
var finalPosition = curPosition + csharpToken.Span.Length;

for (; curPosition < finalPosition; curPosition++)
{
TakeCurrent();
}
AdvancePastToken(csharpToken);

Buffer.Clear();
_currentCSharpTokenTriviaEnumerator = (csharpToken.TrailingTrivia.GetEnumerator(), isLeading: false);
Expand All @@ -578,16 +565,9 @@ private StateResult NumericLiteral()

private StateResult Identifier()
{
var curPosition = Source.Position;
var result = GetNextResult(NextResultType.Token);
var csharpToken = result.Token;
// Don't include trailing trivia; we handle that differently than Roslyn
var finalPosition = curPosition + csharpToken.Span.Length;

for (; curPosition < finalPosition; curPosition++)
{
TakeCurrent();
}
AdvancePastToken(csharpToken);

var type = SyntaxKind.Identifier;
if (!csharpToken.IsKind(CSharpSyntaxKind.IdentifierToken) || result.ContextualKind is not (CSharpSyntaxKind.None or CSharpSyntaxKind.IdentifierToken))
Expand All @@ -602,6 +582,22 @@ private StateResult Identifier()
return Transition(RoslynCSharpTokenizerState.TriviaForCSharpToken, token);
}

private void AdvancePastToken(CSharpSyntaxToken csharpToken)
{
// Don't include trailing trivia; we handle that differently than Roslyn
AdvancePastSpan(csharpToken.Span);
}

private void AdvancePastSpan(TextSpan span)
{
var finalPosition = Source.Position + span.Length;

for (; Source.Position < finalPosition;)
{
TakeCurrent();
}
}

private StateResult Transition(RoslynCSharpTokenizerState state, SyntaxToken? result)
{
return Transition((int)state, result);
Expand Down Expand Up @@ -662,51 +658,38 @@ public override void Reset(int position)
// Most common reset point is the last parsed token, so just try that first.
Debug.Assert(_resultCache.Count > 0);

var lastIndex = _resultCache.Count - 1;
var lastResult = _resultCache[lastIndex];
if (lastResult.position == position)
// We always walk backwards from the current position, rather than doing a binary search, because the common pattern in the parser is
// to put tokens back in the order they were returned. This means that the most common reset point is the last token that was parsed.
// If this ever changes, we can consider doing a binary search at that point.
for (var i = _resultCache.Count -1; i >= 0; i--)
{
_resultCache.RemoveAt(lastIndex);
_roslynTokenParser.ResetTo(lastResult.result);
}
else
{
// Not the last one, so binary search
var index = _resultCache.BinarySearch((position, default), ResultCacheSearcher.Instance);

if (index >= 0)
var (currentPosition, currentResult) = _resultCache[i];
if (currentPosition == position)
{
// Found an exact match
var resetResult = _resultCache[index].result;
_roslynTokenParser.ResetTo(resetResult);
_resultCache.RemoveRange(index, _resultCache.Count - index);
// We found an exact match, so we can reset to it directly.
_roslynTokenParser.ResetTo(currentResult);
_resultCache.RemoveAt(i);
base.CurrentState = (int)RoslynCSharpTokenizerState.Start;
return;
}
else
else if (currentPosition < position)
{
// Reset to the one before reset point, then skip forward
index = ~index - 1;
// We know there was at least one element in the list, so BinarySearch returned either the element with a position further ahead of where we want to reset to, or the length of the list.
// In either case, we know that index is valid.
Debug.Assert(index >= 0);

// We don't want to actually remove the result from the cache in this point: the parser could later ask to reset further back in this same result. This mostly happens for trivia, where the
// parser may ask to put multiple tokens back, each part of the same roslyn trivia piece. However, it is not _only_ for trivia, so we can't assert that. The parser may decide, for example,
// to split the @ from a token, reset the tokenizer after the @, and then keep going.
var resetResult = _resultCache[index].result;
_roslynTokenParser.ResetTo(resetResult);
_roslynTokenParser.ResetTo(currentResult);
_roslynTokenParser.SkipForwardTo(position);

if (index + 1 < _resultCache.Count)
{
// Any results further ahead than the position we reset to are no longer valid, so we want to remove them.
// We need to keep the position we reset to, just in case the parser asks to reset to it again.
_resultCache.RemoveRange(index + 1, _resultCache.Count - index - 1);
}
base.CurrentState = (int)RoslynCSharpTokenizerState.Start;
return;
}
else
{
// We know we're not going to be interested in this reset point anymore, so we can remove it.
Debug.Assert(currentPosition > position);
_resultCache.RemoveAt(i);
}

}

CurrentState = RoslynCSharpTokenizerState.Start;
}

public override void Dispose()
Expand Down Expand Up @@ -746,14 +729,4 @@ private enum RoslynCSharpTokenizerState
StarAfterRazorCommentBody = RazorCommentTokenizerState.StarAfterRazorCommentBody,
AtTokenAfterRazorCommentBody = RazorCommentTokenizerState.AtTokenAfterRazorCommentBody,
}

private sealed class ResultCacheSearcher : IComparer<(int position, SyntaxTokenParser.Result result)>
{
public static ResultCacheSearcher Instance { get; } = new ResultCacheSearcher();

public int Compare((int position, SyntaxTokenParser.Result result) x, (int position, SyntaxTokenParser.Result result) y)
{
return x.position.CompareTo(y.position);
}
}
}

0 comments on commit aa24f69

Please sign in to comment.