diff --git a/src/Parlot/Scanner.cs b/src/Parlot/Scanner.cs
index 5432ca9..b22fa31 100644
--- a/src/Parlot/Scanner.cs
+++ b/src/Parlot/Scanner.cs
@@ -1,741 +1,757 @@
-using System;
-
-using Parlot.Fluent;
-
-using System.Linq;
-
-#if NET8_0_OR_GREATER
-using System.Buffers;
-#endif
-using System.Runtime.CompilerServices;
-
-namespace Parlot;
-
-///
-/// This class is used to return tokens extracted from the input buffer.
-///
-public class Scanner
-{
- public readonly string Buffer;
- public readonly Cursor Cursor;
-
- ///
- /// Scans some text.
- ///
- /// The string containing the text to scan.
- public Scanner(string buffer)
- {
- Buffer = buffer ?? throw new ArgumentNullException(nameof(buffer));
- Cursor = new Cursor(Buffer, TextPosition.Start);
- }
-
- ///
- /// Reads any whitespace without generating a token.
- ///
- /// Whether some white space was read.
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool SkipWhiteSpaceOrNewLine()
- {
- if (!Character.IsWhiteSpaceOrNewLine(Cursor.Current))
- {
- return false;
- }
-
- var span = Cursor.Span;
- var length = span.Length;
-
- for (var i = 1; i < length; i++)
- {
- var c = span[i];
-
- if (!Character.IsWhiteSpaceOrNewLine(c))
- {
- Cursor.Advance(i);
- return true;
- }
- }
-
- Cursor.Advance(span.Length);
- return true;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool SkipWhiteSpace()
- {
- if (!Character.IsWhiteSpace(Cursor.Current))
- {
- return false;
- }
-
- var span = Cursor.Span;
- var length = span.Length;
-
- for (var i = 1; i < length; i++)
- {
- var c = span[i];
-
- if (!Character.IsWhiteSpace(c))
- {
- if (i > 0)
- {
- Cursor.AdvanceNoNewLines(i);
- return true;
- }
-
- return false;
- }
- }
-
- Cursor.AdvanceNoNewLines(span.Length);
- return true;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadFirstThenOthers(Func first, Func other)
- => ReadFirstThenOthers(first, other, out _);
-
- public bool ReadFirstThenOthers(Func first, Func other, out ReadOnlySpan result)
- {
- if (!first(Cursor.Current))
- {
- result = [];
- return false;
- }
-
- var start = Cursor.Offset;
-
- // At this point we have an identifier, read while it's an identifier part.
-
- Cursor.Advance();
-
- ReadWhile(other, out _);
-
- result = Buffer.AsSpan(start, Cursor.Offset - start);
-
- return true;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadIdentifier() => ReadIdentifier(out _);
-
- public bool ReadIdentifier(out ReadOnlySpan result)
- {
- // perf: using Character.IsIdentifierStart instead of x => Character.IsIdentifierStart(x) induces some allocations
-
- return ReadFirstThenOthers(static x => Character.IsIdentifierStart(x), static x => Character.IsIdentifierPart(x), out result);
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadDecimal() => ReadDecimal(out _);
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadDecimal(out ReadOnlySpan number) => ReadDecimal(true, true, false, true, out number);
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadDecimal(NumberOptions numberOptions, out ReadOnlySpan number, char decimalSeparator = '.', char groupSeparator = ',')
- {
- return ReadDecimal(
- (numberOptions & NumberOptions.AllowLeadingSign) != 0,
- (numberOptions & NumberOptions.AllowDecimalSeparator) != 0,
- (numberOptions & NumberOptions.AllowGroupSeparators) != 0,
- (numberOptions & NumberOptions.AllowExponent) != 0,
- out number,
- decimalSeparator,
- groupSeparator);
- }
-
- public bool ReadDecimal(bool allowLeadingSign, bool allowDecimalSeparator, bool allowGroupSeparator, bool allowExponent, out ReadOnlySpan number, char decimalSeparator = '.', char groupSeparator = ',')
- {
- var start = Cursor.Position;
-
- if (allowLeadingSign)
- {
- if (Cursor.Current is '-' or '+')
- {
- Cursor.AdvanceNoNewLines(1);
- }
- }
-
- if (!ReadInteger(out number))
- {
- // If there is no number, check if the decimal separator is allowed and present, otherwise fail
-
- if (!allowDecimalSeparator || Cursor.Current != decimalSeparator)
- {
- Cursor.ResetPosition(start);
- return false;
- }
- }
-
- // Number can be empty if we have a decimal separator directly, in this case don't expect group separators
- if (!number.IsEmpty && allowGroupSeparator && Cursor.Current == groupSeparator)
- {
- var savedCursor = Cursor.Position;
- // Group separators can be repeated as many times
- while (true)
- {
- if (Cursor.Current == groupSeparator)
- {
- Cursor.AdvanceNoNewLines(1);
- }
- else if (!ReadInteger())
- {
- // it was not a group separator, really, so go back where the symbol was and stop
- Cursor.ResetPosition(savedCursor);
- break;
+using System;
+using Parlot.Fluent;
+using System.Linq;
+
+#if NET8_0_OR_GREATER
+using System.Buffers;
+#endif
+using System.Runtime.CompilerServices;
+
+namespace Parlot;
+
+///
+/// This class is used to return tokens extracted from the input buffer.
+///
+public class Scanner
+{
+ public readonly string Buffer;
+ public readonly Cursor Cursor;
+
+ ///
+ /// Scans some text.
+ ///
+ /// The string containing the text to scan.
+ public Scanner(string buffer)
+ {
+ Buffer = buffer ?? throw new ArgumentNullException(nameof(buffer));
+ Cursor = new Cursor(Buffer, TextPosition.Start);
+ }
+
+ ///
+ /// Reads any whitespace without generating a token.
+ ///
+ /// Whether some white space was read.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool SkipWhiteSpaceOrNewLine()
+ {
+ if (!Character.IsWhiteSpaceOrNewLine(Cursor.Current))
+ {
+ return false;
+ }
+
+ var span = Cursor.Span;
+ var length = span.Length;
+
+ for (var i = 1; i < length; i++)
+ {
+ var c = span[i];
+
+ if (!Character.IsWhiteSpaceOrNewLine(c))
+ {
+ Cursor.Advance(i);
+ return true;
+ }
+ }
+
+ Cursor.Advance(span.Length);
+ return true;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool SkipWhiteSpace()
+ {
+ if (!Character.IsWhiteSpace(Cursor.Current))
+ {
+ return false;
+ }
+
+ var span = Cursor.Span;
+ var length = span.Length;
+
+ for (var i = 1; i < length; i++)
+ {
+ var c = span[i];
+
+ if (!Character.IsWhiteSpace(c))
+ {
+ if (i > 0)
+ {
+ Cursor.AdvanceNoNewLines(i);
+ return true;
+ }
+
+ return false;
+ }
+ }
+
+ Cursor.AdvanceNoNewLines(span.Length);
+ return true;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadFirstThenOthers(Func first, Func other)
+ => ReadFirstThenOthers(first, other, out _);
+
+ public bool ReadFirstThenOthers(Func first, Func other, out ReadOnlySpan result)
+ {
+ if (!first(Cursor.Current))
+ {
+ result = [];
+ return false;
+ }
+
+ var start = Cursor.Offset;
+
+ // At this point we have an identifier, read while it's an identifier part.
+
+ Cursor.Advance();
+
+ ReadWhile(other, out _);
+
+ result = Buffer.AsSpan(start, Cursor.Offset - start);
+
+ return true;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadIdentifier() => ReadIdentifier(out _);
+
+ public bool ReadIdentifier(out ReadOnlySpan result)
+ {
+ // perf: using Character.IsIdentifierStart instead of x => Character.IsIdentifierStart(x) induces some allocations
+
+ return ReadFirstThenOthers(static x => Character.IsIdentifierStart(x), static x => Character.IsIdentifierPart(x), out result);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadDecimal() => ReadDecimal(out _);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadDecimal(out ReadOnlySpan number) => ReadDecimal(true, true, false, true, out number);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadDecimal(NumberOptions numberOptions, out ReadOnlySpan number, char decimalSeparator = '.', char groupSeparator = ',')
+ {
+ return ReadDecimal(
+ (numberOptions & NumberOptions.AllowLeadingSign) != 0,
+ (numberOptions & NumberOptions.AllowDecimalSeparator) != 0,
+ (numberOptions & NumberOptions.AllowGroupSeparators) != 0,
+ (numberOptions & NumberOptions.AllowExponent) != 0,
+ out number,
+ decimalSeparator,
+ groupSeparator);
+ }
+
+ public bool ReadDecimal(bool allowLeadingSign, bool allowDecimalSeparator, bool allowGroupSeparator, bool allowExponent, out ReadOnlySpan number, char decimalSeparator = '.', char groupSeparator = ',')
+ {
+ // The buffer is read while the value is a valid decimal number. For instance `123,a` will return `123`.
+
+ var start = Cursor.Position;
+
+ if (allowLeadingSign)
+ {
+ if (Cursor.Current is '-' or '+')
+ {
+ Cursor.AdvanceNoNewLines(1);
+ }
+ }
+
+ if (!ReadInteger(out number))
+ {
+ // If there is no number, check if the decimal separator is allowed and present, otherwise fail
+ if (!allowDecimalSeparator || Cursor.Current != decimalSeparator)
+ {
+ Cursor.ResetPosition(start);
+ return false;
+ }
+ }
+
+ // Number can be empty if we have a decimal separator directly, in this case don't expect group separators
+ if (!number.IsEmpty && allowGroupSeparator && Cursor.Current == groupSeparator)
+ {
+ var beforeGroupPosition = Cursor.Position;
+
+ // Group separators can be repeated as many times
+ while (true)
+ {
+ if (Cursor.Current == groupSeparator)
+ {
+ Cursor.AdvanceNoNewLines(1);
+ }
+ else if (!ReadInteger())
+ {
+ // it was not a group separator so go back where the symbol was and stop
+ Cursor.ResetPosition(beforeGroupPosition);
+ break;
}
else
{
- savedCursor = Cursor.Position;
- }
- }
- }
-
- if (allowDecimalSeparator)
- {
- if (Cursor.Current == decimalSeparator)
- {
- Cursor.AdvanceNoNewLines(1);
-
- ReadInteger(out number);
- }
- }
-
- if (allowExponent && (Cursor.Current is 'e' or 'E'))
- {
- Cursor.AdvanceNoNewLines(1);
-
- if (Cursor.Current is '-' or '+')
- {
- Cursor.AdvanceNoNewLines(1);
- }
-
- // The exponent must be followed by a number, without a group separator
- if (!ReadInteger(out _))
- {
- Cursor.ResetPosition(start);
- return false;
- }
- }
-
- number = Cursor.Buffer.AsSpan(start.Offset, Cursor.Offset - start.Offset);
- return true;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadInteger() => ReadInteger(out _);
-
-#if NET8_0_OR_GREATER
- public bool ReadInteger(out ReadOnlySpan result)
- {
- var span = Cursor.Span;
-
- var noDigitIndex = span.IndexOfAnyExcept(Character._decimalDigits);
-
- // If first char is not a digit, fail
- if (noDigitIndex == 0 || span.IsEmpty)
- {
- result = [];
- return false;
- }
-
- // If all chars are digits
- if (noDigitIndex == -1)
- {
- result = span;
- }
- else
- {
- result = span[..noDigitIndex];
- }
-
- Cursor.AdvanceNoNewLines(result.Length);
-
- return true;
- }
-#else
- public bool ReadInteger(out ReadOnlySpan result)
- {
- var next = 0;
- while (Character.IsDecimalDigit(Cursor.PeekNext(next)))
- {
- next += 1;
- }
-
- // Not digit was read
- if (next == 0)
- {
- result = [];
- return false;
- }
-
- Cursor.AdvanceNoNewLines(next);
- result = Buffer.AsSpan(Cursor.Offset - next, next);
-
- return true;
- }
-#endif
-
- ///
- /// Reads a token while the specific predicate is valid.
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadWhile(Func predicate) => ReadWhile(predicate, out _);
-
- ///
- /// Reads a token while the specific predicate is valid.
- ///
- public bool ReadWhile(Func predicate, out ReadOnlySpan result)
- {
- if (Cursor.Eof || !predicate(Cursor.Current))
- {
- result = [];
- return false;
- }
-
- var start = Cursor.Offset;
-
- Cursor.Advance();
-
- while (!Cursor.Eof && predicate(Cursor.Current))
- {
- Cursor.Advance();
- }
-
- result = Buffer.AsSpan(start, Cursor.Offset - start);
-
- return true;
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadNonWhiteSpace() => ReadNonWhiteSpace(out _);
-
- public bool ReadNonWhiteSpace(out ReadOnlySpan result)
- {
- return ReadWhile(static x => !Character.IsWhiteSpace(x), out result);
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadNonWhiteSpaceOrNewLine() => ReadNonWhiteSpaceOrNewLine(out _);
-
- public bool ReadNonWhiteSpaceOrNewLine(out ReadOnlySpan result)
- {
- return ReadWhile(static x => !Character.IsWhiteSpaceOrNewLine(x), out result);
- }
-
- ///
- /// Reads the specified text.
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadChar(char c)
- {
- if (!Cursor.Match(c))
- {
- return false;
- }
-
- Cursor.Advance();
- return true;
- }
-
- ///
- /// Reads the specified text.
- ///
- public bool ReadChar(char c, out ReadOnlySpan result)
- {
- if (!Cursor.Match(c))
- {
- result = [];
- return false;
- }
-
- var start = Cursor.Offset;
- Cursor.Advance();
-
- result = Buffer.AsSpan(start, Cursor.Offset - start);
- return true;
- }
-
- ///
- /// Reads the specific expected text.
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadText(ReadOnlySpan text, StringComparison comparisonType) => ReadText(text, comparisonType, out _);
-
- ///
- /// Reads the specific expected text.
- ///
- public bool ReadText(ReadOnlySpan text, StringComparison comparisonType, out ReadOnlySpan result)
- {
- if (!Cursor.Match(text, comparisonType))
- {
- result = [];
- return false;
- }
-
- var start = Cursor.Offset;
- Cursor.Advance(text.Length);
- result = Buffer.AsSpan(start, Cursor.Offset - start);
-
- return true;
- }
-
- ///
- /// Reads the specific expected chars.
- ///
- [Obsolete("Prefer bool ReadAnyOf(ReadOnlySpan, out ReadOnlySpan)")]
- public bool ReadAnyOf(ReadOnlySpan chars, StringComparison comparisonType, out ReadOnlySpan result)
- {
- var current = Cursor.Buffer.AsSpan(Cursor.Offset, 1);
-
- var index = chars.IndexOf(current, comparisonType);
-
- if (index == -1)
- {
- result = [];
- return false;
- }
-
- var start = Cursor.Offset;
- Cursor.Advance(index + 1);
- result = Cursor.Buffer.AsSpan(start, index + 1);
-
- return true;
- }
-
- ///
- /// Reads the specific expected chars.
- ///
- public bool ReadAnyOf(ReadOnlySpan chars, out ReadOnlySpan result)
- {
- var start = Cursor.Offset;
-
- while (true)
- {
- var current = Cursor.Current;
- var index = chars.IndexOf(current);
-
- if (index == -1)
- {
- if (Cursor.Offset == start)
- {
- result = [];
- return false;
- }
-
- var length = Cursor.Offset - start;
-
- result = Cursor.Buffer.AsSpan(start, length);
- return true;
- }
-
- if (Cursor.Eof)
- {
- result = [];
- return false;
- }
-
- Cursor.Advance(1);
- }
- }
-
-#if NET8_0_OR_GREATER
- ///
- /// Reads the specific expected chars.
- ///
- ///
- /// This overload uses as this shouldn't be created on every call. The actual implementation of
- /// is chosen based on the constituents of the list. The caller should thus reuse the instance.
- ///
- public bool ReadAnyOf(SearchValues values, out ReadOnlySpan result)
- {
- var span = Cursor.Span;
-
- var notInRangeIndex = span.IndexOfAnyExcept(values);
-
- // If first char is not in range
- if (notInRangeIndex == 0 || span.IsEmpty)
- {
- result = [];
- return false;
- }
-
- // All chars match
- if (notInRangeIndex == -1)
- {
- result = span;
- }
- else
- {
- result = span[..notInRangeIndex];
- }
-
- Cursor.Advance(result.Length);
-
- return true;
- }
-#endif
-
- ///
- /// Reads the specific expected text.
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadText(ReadOnlySpan text) => ReadText(text, out _);
-
- ///
- /// Reads the specific expected text.
- ///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadText(ReadOnlySpan text, out ReadOnlySpan result) => ReadText(text, comparisonType: StringComparison.Ordinal, out result);
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadSingleQuotedString() => ReadSingleQuotedString(out _);
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadSingleQuotedString(out ReadOnlySpan result)
- {
- return ReadQuotedString('\'', out result);
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadDoubleQuotedString() => ReadDoubleQuotedString(out _);
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadDoubleQuotedString(out ReadOnlySpan result)
- {
- return ReadQuotedString('\"', out result);
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadBacktickString() => ReadBacktickString(out _);
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadBacktickString(out ReadOnlySpan result)
- {
- return ReadQuotedString('`', out result);
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadQuotedString() => ReadQuotedString(out _);
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadQuotedString(char[] quoteChar) => ReadQuotedString(quoteChar, out _);
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public bool ReadQuotedString(char[] quoteChar, out ReadOnlySpan result)
- {
- var startChar = Cursor.Current;
-
- if (!quoteChar.Contains( startChar ))
- {
- result = [];
- return false;
- }
-
- return ReadQuotedString(startChar, out result);
- }
-
- public bool ReadQuotedString(out ReadOnlySpan result) => ReadQuotedString(['\'', '\"'],out result);
-
- ///
- /// Reads a string token enclosed in quotes or custom characters.
- ///
- ///
- /// This method doesn't escape the string, but only validates its content is syntactically correct.
- /// The resulting Span contains the original quotes.
- ///
- public bool ReadQuotedString(char quoteChar, out ReadOnlySpan result)
- {
- var startChar = Cursor.Current;
- var start = Cursor.Position;
-
- if (startChar != quoteChar)
- {
- result = [];
- return false;
- }
-
- var nextQuote = Cursor.Span.Slice(1).IndexOf(startChar);
-
- if (nextQuote == -1)
- {
- // There is no end quote, not a string
- result = [];
- return false;
- }
-
- var nextEscape = Cursor.Span.IndexOf('\\');
-
- // If the next escape is not before the next quote, we can return the string as-is
- if (nextEscape == -1 || nextEscape > nextQuote)
- {
- Cursor.Advance(nextQuote + 2); // include start quote
-
- result = Cursor.Buffer.AsSpan().Slice(start.Offset, nextQuote + 2);
- return true;
- }
-
- while (nextEscape != -1)
- {
- Cursor.Advance(nextEscape);
-
- // We can read Eof if there is an escaped quote sequence and no actual end quote, e.g. "'abc\'def"
- if (Cursor.Eof)
- {
- Cursor.ResetPosition(start);
-
- result = [];
- return false;
- }
-
- if (Cursor.Match('\\'))
- {
- Cursor.Advance();
-
- switch (Cursor.Current)
- {
- case '0':
- case '\\':
- case 'a':
- case 'b':
- case 'f':
- case 'n':
- case 'r':
- case 't':
- case 'v':
- case '\'':
- case '"':
- Cursor.Advance();
- break;
-
- case 'u':
-
- // https://stackoverflow.com/a/32175520/142772
- // exactly 4 digits
-#if NET8_0_OR_GREATER
- var allHexDigits = Cursor.Span.Length > 4 && Cursor.Span.Slice(1, 4).IndexOfAnyExcept(Character._hexDigits) == -1;
- var isValidUnicode = allHexDigits;
-
- if (!isValidUnicode)
- {
- Cursor.ResetPosition(start);
-
- result = [];
- return false;
- }
-
- // Advance the cursor by the 4 digits
- Cursor.Advance(4);
-#else
- var isValidUnicode = false;
-
- Cursor.Advance();
-
- if (!Cursor.Eof && Character.IsHexDigit(Cursor.Current))
- {
- Cursor.Advance();
- if (!Cursor.Eof && Character.IsHexDigit(Cursor.Current))
- {
- Cursor.Advance();
- if (!Cursor.Eof && Character.IsHexDigit(Cursor.Current))
- {
- Cursor.Advance();
- if (!Cursor.Eof && Character.IsHexDigit(Cursor.Current))
- {
- isValidUnicode = true;
- }
- }
- }
- }
-
- if (!isValidUnicode)
- {
- Cursor.ResetPosition(start);
-
- result = [];
- return false;
- }
-#endif
- break;
- case 'x':
-
- // At least one digits
-#if NET8_0_OR_GREATER
- var firstNonHexDigit = Cursor.Span.Length > 1 ? Cursor.Span.Slice(1).IndexOfAnyExcept(Character._hexDigits) : -1;
- var isValidHex = firstNonHexDigit > 0;
-
- if (!isValidHex)
- {
- Cursor.ResetPosition(start);
-
- result = [];
- return false;
- }
-
- // Advance the cursor for the read digits
- Cursor.Advance(firstNonHexDigit);
-#else
- var isValidHex = false;
-
- Cursor.Advance();
-
- if (!Cursor.Eof && Character.IsHexDigit(Cursor.Current))
- {
- isValidHex = true;
-
- if (!Cursor.Eof && Character.IsHexDigit(Cursor.PeekNext()))
- {
- Cursor.Advance();
-
- if (!Cursor.Eof && Character.IsHexDigit(Cursor.PeekNext()))
- {
- Cursor.Advance();
-
- if (!Cursor.Eof && Character.IsHexDigit(Cursor.PeekNext()))
- {
- Cursor.Advance();
- }
- }
- }
- }
-
- if (!isValidHex)
- {
- Cursor.ResetPosition(start);
-
- result = [];
- return false;
- }
-#endif
-
- break;
- default:
- Cursor.ResetPosition(start);
-
- result = [];
- return false;
- }
- }
-
- nextEscape = Cursor.Span.IndexOfAny('\\', startChar);
-
- if (Cursor.Match(startChar))
- {
- // Read end quote
- Cursor.Advance(1);
- break;
- }
- else if (nextEscape == -1)
- {
- Cursor.ResetPosition(start);
-
- result = [];
- return false;
- }
- }
-
- result = Cursor.Buffer.AsSpan()[start.Offset..Cursor.Offset];
-
- return true;
- }
-}
+ beforeGroupPosition = Cursor.Position;
+ }
+ }
+ }
+
+ var beforeDecimalSeparator = Cursor.Position;
+
+ if (allowDecimalSeparator && Cursor.Current == decimalSeparator)
+ {
+ Cursor.AdvanceNoNewLines(1);
+
+ var numberIsEmpty = number.IsEmpty;
+
+ if (!ReadInteger(out number))
+ {
+ Cursor.ResetPosition(beforeDecimalSeparator);
+
+ // A decimal separator must be followed by a number if there is no integral part, e.g. `[NaN].[NaN]`
+ if (numberIsEmpty)
+ {
+ return false;
+ }
+
+ number = Cursor.Buffer.AsSpan(start.Offset, Cursor.Offset - start.Offset);
+ return true;
+ }
+ }
+
+ var beforeExponent = Cursor.Position;
+
+ if (allowExponent && (Cursor.Current is 'e' or 'E'))
+ {
+ Cursor.AdvanceNoNewLines(1);
+
+ if (Cursor.Current is '-' or '+')
+ {
+ Cursor.AdvanceNoNewLines(1);
+ }
+
+ // The exponent must be followed by a number, without a group separator, otherwise backtrack to before the exponent
+ if (!ReadInteger(out _))
+ {
+ Cursor.ResetPosition(beforeExponent);
+ number = Cursor.Buffer.AsSpan(start.Offset, Cursor.Offset - start.Offset);
+ return true;
+ }
+ }
+
+ number = Cursor.Buffer.AsSpan(start.Offset, Cursor.Offset - start.Offset);
+ return true;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadInteger() => ReadInteger(out _);
+
+#if NET8_0_OR_GREATER
+ public bool ReadInteger(out ReadOnlySpan result)
+ {
+ var span = Cursor.Span;
+
+ var noDigitIndex = span.IndexOfAnyExcept(Character._decimalDigits);
+
+ // If first char is not a digit, fail
+ if (noDigitIndex == 0 || span.IsEmpty)
+ {
+ result = [];
+ return false;
+ }
+
+ // If all chars are digits
+ if (noDigitIndex == -1)
+ {
+ result = span;
+ }
+ else
+ {
+ result = span[..noDigitIndex];
+ }
+
+ Cursor.AdvanceNoNewLines(result.Length);
+
+ return true;
+ }
+#else
+ public bool ReadInteger(out ReadOnlySpan result)
+ {
+ var next = 0;
+ while (Character.IsDecimalDigit(Cursor.PeekNext(next)))
+ {
+ next += 1;
+ }
+
+ // Not digit was read
+ if (next == 0)
+ {
+ result = [];
+ return false;
+ }
+
+ Cursor.AdvanceNoNewLines(next);
+ result = Buffer.AsSpan(Cursor.Offset - next, next);
+
+ return true;
+ }
+#endif
+
+ ///
+ /// Reads a token while the specific predicate is valid.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadWhile(Func predicate) => ReadWhile(predicate, out _);
+
+ ///
+ /// Reads a token while the specific predicate is valid.
+ ///
+ public bool ReadWhile(Func predicate, out ReadOnlySpan result)
+ {
+ if (Cursor.Eof || !predicate(Cursor.Current))
+ {
+ result = [];
+ return false;
+ }
+
+ var start = Cursor.Offset;
+
+ Cursor.Advance();
+
+ while (!Cursor.Eof && predicate(Cursor.Current))
+ {
+ Cursor.Advance();
+ }
+
+ result = Buffer.AsSpan(start, Cursor.Offset - start);
+
+ return true;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadNonWhiteSpace() => ReadNonWhiteSpace(out _);
+
+ public bool ReadNonWhiteSpace(out ReadOnlySpan result)
+ {
+ return ReadWhile(static x => !Character.IsWhiteSpace(x), out result);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadNonWhiteSpaceOrNewLine() => ReadNonWhiteSpaceOrNewLine(out _);
+
+ public bool ReadNonWhiteSpaceOrNewLine(out ReadOnlySpan result)
+ {
+ return ReadWhile(static x => !Character.IsWhiteSpaceOrNewLine(x), out result);
+ }
+
+ ///
+ /// Reads the specified text.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadChar(char c)
+ {
+ if (!Cursor.Match(c))
+ {
+ return false;
+ }
+
+ Cursor.Advance();
+ return true;
+ }
+
+ ///
+ /// Reads the specified text.
+ ///
+ public bool ReadChar(char c, out ReadOnlySpan result)
+ {
+ if (!Cursor.Match(c))
+ {
+ result = [];
+ return false;
+ }
+
+ var start = Cursor.Offset;
+ Cursor.Advance();
+
+ result = Buffer.AsSpan(start, Cursor.Offset - start);
+ return true;
+ }
+
+ ///
+ /// Reads the specific expected text.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadText(ReadOnlySpan text, StringComparison comparisonType) => ReadText(text, comparisonType, out _);
+
+ ///
+ /// Reads the specific expected text.
+ ///
+ public bool ReadText(ReadOnlySpan text, StringComparison comparisonType, out ReadOnlySpan result)
+ {
+ if (!Cursor.Match(text, comparisonType))
+ {
+ result = [];
+ return false;
+ }
+
+ var start = Cursor.Offset;
+ Cursor.Advance(text.Length);
+ result = Buffer.AsSpan(start, Cursor.Offset - start);
+
+ return true;
+ }
+
+ ///
+ /// Reads the specific expected chars.
+ ///
+ [Obsolete("Prefer bool ReadAnyOf(ReadOnlySpan, out ReadOnlySpan)")]
+ public bool ReadAnyOf(ReadOnlySpan chars, StringComparison comparisonType, out ReadOnlySpan result)
+ {
+ var current = Cursor.Buffer.AsSpan(Cursor.Offset, 1);
+
+ var index = chars.IndexOf(current, comparisonType);
+
+ if (index == -1)
+ {
+ result = [];
+ return false;
+ }
+
+ var start = Cursor.Offset;
+ Cursor.Advance(index + 1);
+ result = Cursor.Buffer.AsSpan(start, index + 1);
+
+ return true;
+ }
+
+ ///
+ /// Reads the specific expected chars.
+ ///
+ public bool ReadAnyOf(ReadOnlySpan chars, out ReadOnlySpan result)
+ {
+ var start = Cursor.Offset;
+
+ while (true)
+ {
+ var current = Cursor.Current;
+ var index = chars.IndexOf(current);
+
+ if (index == -1)
+ {
+ if (Cursor.Offset == start)
+ {
+ result = [];
+ return false;
+ }
+
+ var length = Cursor.Offset - start;
+
+ result = Cursor.Buffer.AsSpan(start, length);
+ return true;
+ }
+
+ if (Cursor.Eof)
+ {
+ result = [];
+ return false;
+ }
+
+ Cursor.Advance(1);
+ }
+ }
+
+#if NET8_0_OR_GREATER
+ ///
+ /// Reads the specific expected chars.
+ ///
+ ///
+ /// This overload uses as this shouldn't be created on every call. The actual implementation of
+ /// is chosen based on the constituents of the list. The caller should thus reuse the instance.
+ ///
+ public bool ReadAnyOf(SearchValues values, out ReadOnlySpan result)
+ {
+ var span = Cursor.Span;
+
+ var notInRangeIndex = span.IndexOfAnyExcept(values);
+
+ // If first char is not in range
+ if (notInRangeIndex == 0 || span.IsEmpty)
+ {
+ result = [];
+ return false;
+ }
+
+ // All chars match
+ if (notInRangeIndex == -1)
+ {
+ result = span;
+ }
+ else
+ {
+ result = span[..notInRangeIndex];
+ }
+
+ Cursor.Advance(result.Length);
+
+ return true;
+ }
+#endif
+
+ ///
+ /// Reads the specific expected text.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadText(ReadOnlySpan text) => ReadText(text, out _);
+
+ ///
+ /// Reads the specific expected text.
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadText(ReadOnlySpan text, out ReadOnlySpan result) => ReadText(text, comparisonType: StringComparison.Ordinal, out result);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadSingleQuotedString() => ReadSingleQuotedString(out _);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadSingleQuotedString(out ReadOnlySpan result)
+ {
+ return ReadQuotedString('\'', out result);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadDoubleQuotedString() => ReadDoubleQuotedString(out _);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadDoubleQuotedString(out ReadOnlySpan result)
+ {
+ return ReadQuotedString('\"', out result);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadBacktickString() => ReadBacktickString(out _);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadBacktickString(out ReadOnlySpan result)
+ {
+ return ReadQuotedString('`', out result);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadQuotedString() => ReadQuotedString(out _);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadQuotedString(char[] quoteChar) => ReadQuotedString(quoteChar, out _);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool ReadQuotedString(char[] quoteChar, out ReadOnlySpan result)
+ {
+ var startChar = Cursor.Current;
+
+ if (!quoteChar.Contains( startChar ))
+ {
+ result = [];
+ return false;
+ }
+
+ return ReadQuotedString(startChar, out result);
+ }
+
+ public bool ReadQuotedString(out ReadOnlySpan result) => ReadQuotedString(['\'', '\"'],out result);
+
+ ///
+ /// Reads a string token enclosed in quotes or custom characters.
+ ///
+ ///
+ /// This method doesn't escape the string, but only validates its content is syntactically correct.
+ /// The resulting Span contains the original quotes.
+ ///
+ public bool ReadQuotedString(char quoteChar, out ReadOnlySpan result)
+ {
+ var startChar = Cursor.Current;
+ var start = Cursor.Position;
+
+ if (startChar != quoteChar)
+ {
+ result = [];
+ return false;
+ }
+
+ var nextQuote = Cursor.Span.Slice(1).IndexOf(startChar);
+
+ if (nextQuote == -1)
+ {
+ // There is no end quote, not a string
+ result = [];
+ return false;
+ }
+
+ var nextEscape = Cursor.Span.IndexOf('\\');
+
+ // If the next escape is not before the next quote, we can return the string as-is
+ if (nextEscape == -1 || nextEscape > nextQuote)
+ {
+ Cursor.Advance(nextQuote + 2); // include start quote
+
+ result = Cursor.Buffer.AsSpan().Slice(start.Offset, nextQuote + 2);
+ return true;
+ }
+
+ while (nextEscape != -1)
+ {
+ Cursor.Advance(nextEscape);
+
+ // We can read Eof if there is an escaped quote sequence and no actual end quote, e.g. "'abc\'def"
+ if (Cursor.Eof)
+ {
+ Cursor.ResetPosition(start);
+
+ result = [];
+ return false;
+ }
+
+ if (Cursor.Match('\\'))
+ {
+ Cursor.Advance();
+
+ switch (Cursor.Current)
+ {
+ case '0':
+ case '\\':
+ case 'a':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ case 'v':
+ case '\'':
+ case '"':
+ Cursor.Advance();
+ break;
+
+ case 'u':
+
+ // https://stackoverflow.com/a/32175520/142772
+ // exactly 4 digits
+#if NET8_0_OR_GREATER
+ var allHexDigits = Cursor.Span.Length > 4 && Cursor.Span.Slice(1, 4).IndexOfAnyExcept(Character._hexDigits) == -1;
+ var isValidUnicode = allHexDigits;
+
+ if (!isValidUnicode)
+ {
+ Cursor.ResetPosition(start);
+
+ result = [];
+ return false;
+ }
+
+ // Advance the cursor by the 4 digits
+ Cursor.Advance(4);
+#else
+ var isValidUnicode = false;
+
+ Cursor.Advance();
+
+ if (!Cursor.Eof && Character.IsHexDigit(Cursor.Current))
+ {
+ Cursor.Advance();
+ if (!Cursor.Eof && Character.IsHexDigit(Cursor.Current))
+ {
+ Cursor.Advance();
+ if (!Cursor.Eof && Character.IsHexDigit(Cursor.Current))
+ {
+ Cursor.Advance();
+ if (!Cursor.Eof && Character.IsHexDigit(Cursor.Current))
+ {
+ isValidUnicode = true;
+ }
+ }
+ }
+ }
+
+ if (!isValidUnicode)
+ {
+ Cursor.ResetPosition(start);
+
+ result = [];
+ return false;
+ }
+#endif
+ break;
+ case 'x':
+
+ // At least one digits
+#if NET8_0_OR_GREATER
+ var firstNonHexDigit = Cursor.Span.Length > 1 ? Cursor.Span.Slice(1).IndexOfAnyExcept(Character._hexDigits) : -1;
+ var isValidHex = firstNonHexDigit > 0;
+
+ if (!isValidHex)
+ {
+ Cursor.ResetPosition(start);
+
+ result = [];
+ return false;
+ }
+
+ // Advance the cursor for the read digits
+ Cursor.Advance(firstNonHexDigit);
+#else
+ var isValidHex = false;
+
+ Cursor.Advance();
+
+ if (!Cursor.Eof && Character.IsHexDigit(Cursor.Current))
+ {
+ isValidHex = true;
+
+ if (!Cursor.Eof && Character.IsHexDigit(Cursor.PeekNext()))
+ {
+ Cursor.Advance();
+
+ if (!Cursor.Eof && Character.IsHexDigit(Cursor.PeekNext()))
+ {
+ Cursor.Advance();
+
+ if (!Cursor.Eof && Character.IsHexDigit(Cursor.PeekNext()))
+ {
+ Cursor.Advance();
+ }
+ }
+ }
+ }
+
+ if (!isValidHex)
+ {
+ Cursor.ResetPosition(start);
+
+ result = [];
+ return false;
+ }
+#endif
+
+ break;
+ default:
+ Cursor.ResetPosition(start);
+
+ result = [];
+ return false;
+ }
+ }
+
+ nextEscape = Cursor.Span.IndexOfAny('\\', startChar);
+
+ if (Cursor.Match(startChar))
+ {
+ // Read end quote
+ Cursor.Advance(1);
+ break;
+ }
+ else if (nextEscape == -1)
+ {
+ Cursor.ResetPosition(start);
+
+ result = [];
+ return false;
+ }
+ }
+
+ result = Cursor.Buffer.AsSpan()[start.Offset..Cursor.Offset];
+
+ return true;
+ }
+}
diff --git a/test/Parlot.Tests/ScannerTests.cs b/test/Parlot.Tests/ScannerTests.cs
index 30feb30..b69e096 100644
--- a/test/Parlot.Tests/ScannerTests.cs
+++ b/test/Parlot.Tests/ScannerTests.cs
@@ -1,3 +1,4 @@
+using Parlot.Tests.Calc;
using System;
using System.Buffers;
@@ -294,6 +295,7 @@ public void ShouldNotReadInvalidInteger(string text)
[InlineData("123a", "123")]
[InlineData("123.0", "123")]
[InlineData("123.0a", "123")]
+ [InlineData("123.", "123")]
[InlineData("123 ", "123")]
public void ShouldReadValidInteger(string text, string expected)
{
@@ -303,10 +305,12 @@ public void ShouldReadValidInteger(string text, string expected)
[Theory]
[InlineData(" 1")]
- [InlineData("123.e")]
+ [InlineData("abc")]
+ [InlineData(".")]
+ [InlineData(",")]
public void ShouldNotReadInvalidDecimal(string text)
{
- Assert.False(new Scanner(text).ReadDecimal());
+ Assert.False(new Scanner(text).ReadDecimal(Fluent.NumberOptions.Any, groupSeparator: ',', decimalSeparator: '.', number: out _));
}
[Theory]
@@ -377,39 +381,34 @@ public void ShouldReadNumberWithMultipleGroupSeparators(string input, string exp
}
[Theory]
- [InlineData("123,", "123", ",")]
- [InlineData("123,a", "123", ",a")]
- public void ShouldReadNumberWithTrailingDecimalSeparators(string input, string expected, string expected2)
+ [InlineData("123", "123")]
+ [InlineData("123,123", "123,123")]
+ [InlineData("123,a", "123")]
+ [InlineData("123,123,a", "123,123")]
+ [InlineData("123,123,123", "123,123,123")]
+ [InlineData("123,.1", "123")]
+ [InlineData("123,.e", "123")]
+ [InlineData("123,e", "123")]
+ [InlineData("123,", "123")]
+ public void ShouldReadDecimalWithGroupSeparator(string input, string expected)
{
Scanner s = new(input);
- Assert.True(s.ReadDecimal(Fluent.NumberOptions.AllowDecimalSeparator, out var result));
+ Assert.True(s.ReadDecimal(Fluent.NumberOptions.AllowGroupSeparators | Fluent.NumberOptions.AllowDecimalSeparator, out var result, groupSeparator: ',', decimalSeparator: '.'));
Assert.Equal(expected, result);
- Assert.True(s.ReadNonWhiteSpace(out var result2));
- Assert.Equal(expected2, result2);
}
[Theory]
- [InlineData("1, 2, 3", "1", "2", "3")]
- public void ShouldReadNumberListWithDecimalSeparators(string input, string expected1, string expected2, string expected3)
+ [InlineData("123.456", "123.456")]
+ [InlineData("123.456a", "123.456")]
+ [InlineData("123.a", "123")]
+ [InlineData("123.456.789", "123.456")]
+ [InlineData("123.", "123")]
+ public void ShouldReadDecimalWithDecimalSeparator(string input, string expected)
{
Scanner s = new(input);
- Assert.True(s.ReadDecimal(Fluent.NumberOptions.AllowDecimalSeparator, out var result));
- Assert.Equal(expected1, result);
- Assert.True(s.ReadNonWhiteSpace(out var resultSep));
- Assert.Equal(",", resultSep);
- Assert.True(s.SkipWhiteSpace());
-
- Assert.True(s.ReadDecimal(Fluent.NumberOptions.AllowDecimalSeparator, out result));
- Assert.Equal(expected2, result);
- Assert.True(s.ReadNonWhiteSpace(out resultSep));
- Assert.Equal(",", resultSep);
- Assert.True(s.SkipWhiteSpace());
-
- Assert.True(s.ReadDecimal(Fluent.NumberOptions.AllowDecimalSeparator, out result));
- Assert.Equal(expected3, result);
+ Assert.True(s.ReadDecimal(Fluent.NumberOptions.AllowDecimalSeparator, out var result, decimalSeparator: '.'));
+ Assert.Equal(expected, result);
}
-
-
}