diff --git a/src/UglyToad.PdfPig.Core/ReadHelper.cs b/src/UglyToad.PdfPig.Core/ReadHelper.cs
index 184489fc1..4d6e9842f 100644
--- a/src/UglyToad.PdfPig.Core/ReadHelper.cs
+++ b/src/UglyToad.PdfPig.Core/ReadHelper.cs
@@ -24,12 +24,17 @@ public static class ReadHelper
///
public const byte AsciiCarriageReturn = 13;
+ ///
+ /// The tab '\t' character.
+ ///
+ public const byte AsciiTab = 9;
+
private static readonly HashSet EndOfNameCharacters =
[
' ',
AsciiCarriageReturn,
AsciiLineFeed,
- 9,
+ AsciiTab,
'>',
'<',
'[',
diff --git a/src/UglyToad.PdfPig.Core/StreamInputBytes.cs b/src/UglyToad.PdfPig.Core/StreamInputBytes.cs
index d29868741..21f215b8f 100644
--- a/src/UglyToad.PdfPig.Core/StreamInputBytes.cs
+++ b/src/UglyToad.PdfPig.Core/StreamInputBytes.cs
@@ -96,6 +96,17 @@ public bool IsAtEnd()
///
public void Seek(long position)
{
+ var current = CurrentOffset;
+ if (position == current)
+ {
+ return;
+ }
+ else if (peekByte.HasValue && position == current + 1)
+ {
+ MoveNext();
+ return;
+ }
+
isAtEnd = false;
peekByte = null;
diff --git a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1ArrayTokenizer.cs b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1ArrayTokenizer.cs
index eb709f04f..e1d6e30c5 100644
--- a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1ArrayTokenizer.cs
+++ b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1ArrayTokenizer.cs
@@ -14,7 +14,7 @@ public sealed class Type1ArrayTokenizer : ITokenizer
///
public bool ReadsNextByte { get; } = false;
- private static readonly string[] Space = [" "];
+ private static readonly char[] Space = [' '];
///
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
diff --git a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs
index 606725be1..6b3e26e4a 100644
--- a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs
+++ b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs
@@ -88,6 +88,11 @@ public static Type1Font Parse(IInputBytes inputBytes, int length1, int length2)
{
int offset = 0;
+ while (inputBytes.Peek() is { } b && ReadHelper.IsWhitespace(b))
+ {
+ inputBytes.MoveNext();
+ }
+
while (inputBytes.MoveNext())
{
if (inputBytes.CurrentByte == (byte)ClearToMark[offset])
diff --git a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1Tokenizer.cs b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1Tokenizer.cs
index 333f008ed..aac323f3f 100644
--- a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1Tokenizer.cs
+++ b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1Tokenizer.cs
@@ -2,6 +2,7 @@
{
using System;
using System.Collections.Generic;
+ using System.Diagnostics;
using System.Globalization;
using System.Text;
using Core;
@@ -41,35 +42,43 @@ private Type1Token ReadNextToken()
do
{
skip = false;
- while (bytes.MoveNext())
+ while (bytes.Peek() is { } b)
{
- var b = bytes.CurrentByte;
var c = (char)b;
switch (c)
{
case '%':
+ bytes.MoveNext();
comments.Add(ReadComment());
break;
case '(':
+ bytes.MoveNext();
return ReadString();
case ')':
throw new InvalidOperationException("Encountered an end of string ')' outside of string.");
case '[':
+ bytes.MoveNext();
return new Type1Token(c, Type1Token.TokenType.StartArray);
case ']':
+ bytes.MoveNext();
return new Type1Token(c, Type1Token.TokenType.EndArray);
case '{':
+ bytes.MoveNext();
return new Type1Token(c, Type1Token.TokenType.StartProc);
case '}':
+ bytes.MoveNext();
return new Type1Token(c, Type1Token.TokenType.EndProc);
case '/':
{
- var name = ReadLiteral();
+ bytes.MoveNext();
+ TryReadLiteral(out var name);
+ Debug.Assert(name != null);
return new Type1Token(name, Type1Token.TokenType.Literal);
}
case '<':
{
+ bytes.MoveNext();
var following = bytes.Peek();
if (following == '<')
{
@@ -81,6 +90,7 @@ private Type1Token ReadNextToken()
}
case '>':
{
+ bytes.MoveNext();
var following = bytes.Peek();
if (following == '>')
{
@@ -94,23 +104,24 @@ private Type1Token ReadNextToken()
{
if (ReadHelper.IsWhitespace(b))
{
+ bytes.MoveNext();
skip = true;
break;
}
if (b == 0)
{
+ bytes.MoveNext();
skip = true;
break;
}
- if (TryReadNumber(c, out var number))
+ if (TryReadNumber(out var number))
{
return number;
}
- var name = ReadLiteral(c);
- if (name == null)
+ if (!TryReadLiteral(out var name))
{
throw new InvalidOperationException($"The binary portion of the type 1 font was invalid at position {bytes.CurrentOffset}.");
}
@@ -197,12 +208,21 @@ char GetNext()
return null;
}
- private bool TryReadNumber(char c, out Type1Token numberToken)
+ private bool TryReadNumber(out Type1Token numberToken)
{
char GetNext()
{
bytes.MoveNext();
- return (char)bytes.CurrentByte;
+ return (char)(bytes.Peek() ?? 0);
+ }
+
+ char c = (char)(bytes.Peek() ?? 0);
+
+ if (!((c >= '0' && c <= '9') || c is '+' or '-'))
+ {
+ // Easy out. Not a valid number
+ numberToken = null;
+ return false;
}
numberToken = null;
@@ -251,8 +271,6 @@ char GetNext()
else
{
// integer
- bytes.Seek(bytes.CurrentOffset - 1);
-
numberToken = new Type1Token(sb.ToString(), Type1Token.TokenType.Integer);
return true;
}
@@ -309,7 +327,6 @@ char GetNext()
}
}
- bytes.Seek(bytes.CurrentOffset - 1);
if (radix != null)
{
var number = Convert.ToInt32(sb.ToString(), int.Parse(radix.ToString(), CultureInfo.InvariantCulture));
@@ -323,14 +340,9 @@ char GetNext()
return true;
}
- private string ReadLiteral(char? previousCharacter = null)
+ private bool TryReadLiteral(out string? value)
{
literalBuffer.Clear();
- if (previousCharacter.HasValue)
- {
- literalBuffer.Append(previousCharacter);
- }
-
do
{
var b = bytes.Peek();
@@ -350,8 +362,16 @@ private string ReadLiteral(char? previousCharacter = null)
literalBuffer.Append(c);
} while (bytes.MoveNext());
- var literal = literalBuffer.ToString();
- return literal.Length == 0 ? null : literal;
+ if (literalBuffer.Length > 0)
+ {
+ value = literalBuffer.ToString();
+ return true;
+ }
+ else
+ {
+ value = null;
+ return false;
+ }
}
private string ReadComment()
@@ -375,9 +395,10 @@ private string ReadComment()
private Type1DataToken ReadCharString(int length)
{
// Skip preceding space.
- bytes.MoveNext();
- // TODO: may be wrong
- // bytes.MoveNext();
+ if (bytes.Peek() is { } ws && ReadHelper.IsWhitespace(ws))
+ {
+ bytes.MoveNext();
+ }
byte[] data = new byte[length];
for (int i = 0; i < length; i++)
diff --git a/src/UglyToad.PdfPig.Tests/Tokenization/NumericTokenizerTests.cs b/src/UglyToad.PdfPig.Tests/Tokenization/NumericTokenizerTests.cs
index be8a19a25..fa9654ff9 100644
--- a/src/UglyToad.PdfPig.Tests/Tokenization/NumericTokenizerTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Tokenization/NumericTokenizerTests.cs
@@ -91,7 +91,10 @@ public void OnlyParsesNumberPart()
Assert.True(result);
Assert.Equal(135.6654, AssertNumericToken(token).Data);
- Assert.Equal('/', (char)input.Bytes.CurrentByte);
+ if (tokenizer.ReadsNextByte)
+ Assert.Equal('/', (char)input.Bytes.CurrentByte);
+ else
+ Assert.Equal('4', (char)input.Bytes.CurrentByte);
}
[Fact]
diff --git a/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs
index 8c95e41ff..da1f468dd 100644
--- a/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs
@@ -9,7 +9,7 @@ internal sealed class ArrayTokenizer : ITokenizer
{
private readonly bool usePdfDocEncoding;
- public bool ReadsNextByte { get; } = false;
+ public bool ReadsNextByte => false;
public ArrayTokenizer(bool usePdfDocEncoding)
{
diff --git a/src/UglyToad.PdfPig.Tokenization/CommentTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/CommentTokenizer.cs
index cd51a0e23..25c588952 100644
--- a/src/UglyToad.PdfPig.Tokenization/CommentTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/CommentTokenizer.cs
@@ -6,7 +6,7 @@
internal sealed class CommentTokenizer : ITokenizer
{
- public bool ReadsNextByte { get; } = true;
+ public bool ReadsNextByte => false;
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
@@ -17,10 +17,11 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
return false;
}
- using var builder = new ValueStringBuilder();
+ using var builder = new ValueStringBuilder(stackalloc char[32]);
- while (inputBytes.MoveNext() && !ReadHelper.IsEndOfLine(inputBytes.CurrentByte))
+ while (inputBytes.Peek() is { } c && !ReadHelper.IsEndOfLine(c))
{
+ inputBytes.MoveNext();
builder.Append((char) inputBytes.CurrentByte);
}
diff --git a/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs
index 213fdcfc2..3b75ffcc6 100644
--- a/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs
@@ -11,7 +11,7 @@ internal class DictionaryTokenizer : ITokenizer
private readonly IReadOnlyList requiredKeys;
private readonly bool useLenientParsing;
- public bool ReadsNextByte { get; } = false;
+ public bool ReadsNextByte => false;
///
/// Create a new .
diff --git a/src/UglyToad.PdfPig.Tokenization/EndOfLineTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/EndOfLineTokenizer.cs
index 7fb7d2c7a..66597f646 100644
--- a/src/UglyToad.PdfPig.Tokenization/EndOfLineTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/EndOfLineTokenizer.cs
@@ -9,7 +9,7 @@
public sealed class EndOfLineTokenizer : ITokenizer
{
///
- public bool ReadsNextByte { get; } = false;
+ public bool ReadsNextByte => false;
///
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
diff --git a/src/UglyToad.PdfPig.Tokenization/HexTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/HexTokenizer.cs
index 72fe06772..92bc16309 100644
--- a/src/UglyToad.PdfPig.Tokenization/HexTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/HexTokenizer.cs
@@ -5,7 +5,7 @@
internal sealed class HexTokenizer : ITokenizer
{
- public bool ReadsNextByte { get; } = false;
+ public bool ReadsNextByte => false;
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
diff --git a/src/UglyToad.PdfPig.Tokenization/NameTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/NameTokenizer.cs
index 1aa624471..16974167a 100644
--- a/src/UglyToad.PdfPig.Tokenization/NameTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/NameTokenizer.cs
@@ -11,14 +11,14 @@
internal sealed class NameTokenizer : ITokenizer
{
+#if NET
static NameTokenizer()
{
-#if NET
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
-#endif
}
+#endif
- public bool ReadsNextByte { get; } = true;
+ public bool ReadsNextByte => false;
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
@@ -35,10 +35,8 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
int postEscapeRead = 0;
Span escapedChars = stackalloc char[2];
- while (inputBytes.MoveNext())
+ while (inputBytes.Peek() is { } b)
{
- var b = inputBytes.CurrentByte;
-
if (b == '#')
{
escapeActive = true;
@@ -52,8 +50,9 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
if (postEscapeRead == 2)
{
- int high = escapedChars[0] <= '9' ? escapedChars[0] - '0' : char.ToUpper(escapedChars[0]) - 'A' + 10;
- int low = escapedChars[1] <= '9' ? escapedChars[1] - '0' : char.ToUpper(escapedChars[1]) - 'A' + 10;
+ // We validated that the char is hex. So assume ASCII rules apply and shortcut hex decoding
+ int high = escapedChars[0] <= '9' ? escapedChars[0] - '0' : ((escapedChars[0] & 0xF) + 9);
+ int low = escapedChars[1] <= '9' ? escapedChars[1] - '0' : ((escapedChars[1] & 0xF) + 9);
byte characterToWrite = (byte)(high * 16 + low);
@@ -100,6 +99,8 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
{
bytes.Write(b);
}
+
+ inputBytes.MoveNext();
}
#if NET8_0_OR_GREATER
diff --git a/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs
index e5123eb47..55413f0d7 100644
--- a/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs
@@ -7,15 +7,7 @@ namespace UglyToad.PdfPig.Tokenization;
internal sealed class NumericTokenizer : ITokenizer
{
- private const byte Zero = 48;
- private const byte Nine = 57;
- private const byte Negative = (byte)'-';
- private const byte Positive = (byte)'+';
- private const byte Period = (byte)'.';
- private const byte ExponentLower = (byte)'e';
- private const byte ExponentUpper = (byte)'E';
-
- public bool ReadsNextByte => true;
+ public bool ReadsNextByte => false;
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken? token)
{
@@ -37,30 +29,50 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken? to
var isExponentNegative = false;
var exponentPart = 0;
- do
+ byte? firstByte = currentByte;
+ bool noRead = true;
+ bool acceptSign = true;
+ while (!inputBytes.IsAtEnd() || firstByte is { })
{
- var b = inputBytes.CurrentByte;
- if (b >= Zero && b <= Nine)
+ if (firstByte is { } b)
+ {
+ firstByte = null;
+ }
+ else if (noRead)
+ {
+ noRead = false;
+ b = inputBytes.Peek() ?? 0;
+ }
+ else
{
+ inputBytes.MoveNext();
+ b = inputBytes.Peek() ?? 0;
+ }
+
+ if (b >= '0' && b <= '9')
+ {
+ var value = b - '0';
if (hasExponent)
{
- exponentPart = (exponentPart * 10) + (b - Zero);
+ exponentPart = (exponentPart * 10) + value;
}
else if (hasFraction)
{
- fractionalPart = (fractionalPart * 10) + (b - Zero);
+ fractionalPart = (fractionalPart * 10) + value;
fractionalCount++;
}
else
{
- integerPart = (integerPart * 10) + (b - Zero);
+ integerPart = (integerPart * 10) + value;
}
+ acceptSign = false;
}
- else if (b == Positive)
+ else if (b == '+' && acceptSign)
{
// Has no impact
+ acceptSign = false;
}
- else if (b == Negative)
+ else if (b == '-' && acceptSign)
{
if (hasExponent)
{
@@ -70,30 +82,17 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken? to
{
isNegative = true;
}
+ // acceptSign = false; // Somehow we have a test that expects to support "--21.72" to return -21.72
}
- else if (b == Period)
+ else if (b == '.' && !hasExponent && !hasFraction)
{
- if (hasExponent || hasFraction)
- {
- return false;
- }
-
hasFraction = true;
+ acceptSign = false;
}
- else if (b == ExponentLower || b == ExponentUpper)
+ else if ((b == 'e' || b == 'E') && readBytes > 0 && !hasExponent)
{
- // Don't allow leading exponent.
- if (readBytes == 0)
- {
- return false;
- }
-
- if (hasExponent)
- {
- return false;
- }
-
hasExponent = true;
+ acceptSign = true;
}
else
{
@@ -107,7 +106,7 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken? to
}
readBytes++;
- } while (inputBytes.MoveNext());
+ }
if (hasExponent && !isExponentNegative)
{
diff --git a/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs
index 5019eda8c..c637d7aa2 100644
--- a/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs
@@ -6,7 +6,7 @@
internal sealed class PlainTokenizer : ITokenizer
{
- public bool ReadsNextByte { get; } = true;
+ public bool ReadsNextByte => false;
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
@@ -21,18 +21,11 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
builder.Append((char)currentByte);
- while (inputBytes.MoveNext())
+ while (inputBytes.Peek() is { } b
+ && !ReadHelper.IsWhitespace(b)
+ && (char)b is not '<' and not '[' and not '/' and not ']' and not '>' and not '(' and not ')')
{
- if (ReadHelper.IsWhitespace(inputBytes.CurrentByte))
- {
- break;
- }
-
- if (inputBytes.CurrentByte is (byte)'<' or (byte)'[' or (byte)'/' or (byte)']' or (byte)'>' or (byte)'(' or (byte)')')
- {
- break;
- }
-
+ inputBytes.MoveNext();
builder.Append((char) inputBytes.CurrentByte);
}
diff --git a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs
index 9d74ba031..228862fbd 100644
--- a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs
+++ b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs
@@ -246,7 +246,7 @@ public bool MoveNext()
/*
* Some tokenizers need to read the symbol of the next token to know if they have ended
- * so we don't want to move on to the next byte, we would lose a byte, e.g.: /NameOne/NameTwo or /Name(string)
+ * so we don't want to move on to the next byte, we would lose a byte, e.g.: /NameOne/NameTwo or /Name(string)
*/
hasBytePreRead = tokenizer.ReadsNextByte;
@@ -317,12 +317,13 @@ private List ReadInlineImageData()
{
// The ID operator should be followed by a single white-space character, and the next character is interpreted
// as the first byte of image data.
- if (!ReadHelper.IsWhitespace(inputBytes.CurrentByte))
+ if (inputBytes.Peek() is { } c
+ && !ReadHelper.IsWhitespace(c))
{
throw new PdfDocumentFormatException($"No whitespace character following the image data (ID) operator. Position: {inputBytes.CurrentOffset}.");
}
- var startsAt = inputBytes.CurrentOffset - 2;
+ var startsAt = inputBytes.CurrentOffset - 1;
return ReadUntilEndImage(startsAt);
}
diff --git a/src/UglyToad.PdfPig/Parser/FileStructure/FileHeaderParser.cs b/src/UglyToad.PdfPig/Parser/FileStructure/FileHeaderParser.cs
index 237515bc1..8775244cb 100644
--- a/src/UglyToad.PdfPig/Parser/FileStructure/FileHeaderParser.cs
+++ b/src/UglyToad.PdfPig/Parser/FileStructure/FileHeaderParser.cs
@@ -80,9 +80,8 @@ private static HeaderVersion GetHeaderVersionAndResetScanner(CommentToken commen
}
var atEnd = scanner.CurrentPosition == scanner.Length;
- var rewind = atEnd ? 1 : 2;
- var commentOffset = scanner.CurrentPosition - comment.Data.Length - rewind;
+ var commentOffset = scanner.CurrentPosition - comment.Data.Length - 1;
scanner.Seek(0);
diff --git a/src/UglyToad.PdfPig/Parser/FileStructure/FirstPassParser.StartXref.cs b/src/UglyToad.PdfPig/Parser/FileStructure/FirstPassParser.StartXref.cs
index 43b24b722..984083236 100644
--- a/src/UglyToad.PdfPig/Parser/FileStructure/FirstPassParser.StartXref.cs
+++ b/src/UglyToad.PdfPig/Parser/FileStructure/FirstPassParser.StartXref.cs
@@ -10,13 +10,56 @@ internal static partial class FirstPassParser
{
private static ReadOnlySpan StartXRefBytes => "startxref"u8;
+ public const long EndOfFileBufferSize = 1024;
+
public static StartXRefLocation GetFirstCrossReferenceOffset(
IInputBytes bytes,
ISeekableTokenScanner scanner,
ILog log)
{
+ // We used to read backward through the file, but this is quite expensive for streams that directly wrap OS files.
+ // Instead we fetch the last 1024 bytes of the file and do a memory search, as cheap first attempt. This is significantly faster
+ // in practice, if there is no in-process caching of the file involved
+ //
+ // If that fails (in practice it should never) we fall back to the old method of reading backwards.
var fileLength = bytes.Length;
+ {
+ var fetchFrom = Math.Max(bytes.Length - EndOfFileBufferSize, 0L);
+
+ bytes.Seek(fetchFrom);
+
+ Span byteBuffer = new byte[bytes.Length - fetchFrom]; // TODO: Maybe use PoolArray?
+
+ int n = bytes.Read(byteBuffer);
+
+ if (n == byteBuffer.Length)
+ {
+ int lx = byteBuffer.LastIndexOf("startxref"u8);
+
+ if (lx < 0)
+ {
+ // See old code. We also try a mangled version
+ lx = byteBuffer.LastIndexOf("startref"u8);
+ }
+
+ if (lx >= 0)
+ {
+ scanner.Seek(fetchFrom + lx);
+
+ if (scanner.TryReadToken(out OperatorToken startXrefOp) && (startXrefOp.Data == "startxref" || startXrefOp.Data == "startref"))
+ {
+ var pos = GetNumericTokenFollowingCurrent(scanner);
+
+ log.Debug($"Found startxref at {pos}");
+
+ return new StartXRefLocation(fetchFrom + lx, pos);
+ }
+ }
+
+ }
+ }
+ // Now fall through in the old code
var buffer = new CircularByteBuffer(StartXRefBytes.Length);
// Start from the end of the file
diff --git a/src/UglyToad.PdfPig/Parser/Parts/BruteForceSearcher.cs b/src/UglyToad.PdfPig/Parser/Parts/BruteForceSearcher.cs
index 0ecde8965..e26ba0e08 100644
--- a/src/UglyToad.PdfPig/Parser/Parts/BruteForceSearcher.cs
+++ b/src/UglyToad.PdfPig/Parser/Parts/BruteForceSearcher.cs
@@ -57,7 +57,7 @@ public static IReadOnlyDictionary GetObjectLocations(II
{
var next = bytes.Peek();
- if (next.HasValue && next == 'n')
+ if (next == 'n')
{
if (ReadHelper.IsString(bytes, "endobj"))
{
diff --git a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs
index 8fbcf4180..c5c9fcd42 100644
--- a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs
+++ b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs
@@ -465,7 +465,7 @@ private bool TryReadStream(long startStreamTokenOffset, bool getLength, [NotNull
read++;
}
- long streamDataEnd = inputBytes.CurrentOffset + 1;
+ long streamDataEnd = inputBytes.CurrentOffset;
if (possibleEndLocation == null)
return false;