From dd505465596f375ac537a7c72a10e4c2b9715bef Mon Sep 17 00:00:00 2001
From: Bert Huijben <bert@vmoo.com>
Date: Thu, 16 Oct 2025 11:36:49 +0200
Subject: [PATCH] Avoid a lot of seeks by making most tokenizers no longer read
 to far by using seek.

Optimize the FirstPassParser to just fetch a final chunk before doing things char-by-char backwards.
---
 src/UglyToad.PdfPig.Core/ReadHelper.cs        |  7 +-
 src/UglyToad.PdfPig.Core/StreamInputBytes.cs  | 11 +++
 .../Type1/Parser/Type1ArrayTokenizer.cs       |  2 +-
 .../Type1/Parser/Type1FontParser.cs           |  5 ++
 .../Type1/Parser/Type1Tokenizer.cs            | 65 +++++++++++------
 .../Tokenization/NumericTokenizerTests.cs     |  5 +-
 .../ArrayTokenizer.cs                         |  2 +-
 .../CommentTokenizer.cs                       |  7 +-
 .../DictionaryTokenizer.cs                    |  2 +-
 .../EndOfLineTokenizer.cs                     |  2 +-
 .../HexTokenizer.cs                           |  2 +-
 .../NameTokenizer.cs                          | 17 ++---
 .../NumericTokenizer.cs                       | 71 +++++++++----------
 .../PlainTokenizer.cs                         | 17 ++---
 .../Scanner/CoreTokenScanner.cs               |  7 +-
 .../Parser/FileStructure/FileHeaderParser.cs  |  3 +-
 .../FirstPassParser.StartXref.cs              | 43 +++++++++++
 .../Parser/Parts/BruteForceSearcher.cs        |  2 +-
 .../Tokenization/Scanner/PdfTokenScanner.cs   |  2 +-
 19 files changed, 177 insertions(+), 95 deletions(-)
diff --git a/src/UglyToad.PdfPig.Core/ReadHelper.cs b/src/UglyToad.PdfPig.Core/ReadHelper.cs
index 184489fc1..4d6e9842f 100644
--- a/src/UglyToad.PdfPig.Core/ReadHelper.cs
+++ b/src/UglyToad.PdfPig.Core/ReadHelper.cs
@@ -24,12 +24,17 @@ public static class ReadHelper
         /// </summary>
         public const byte AsciiCarriageReturn = 13;
 
+        /// <summary>
+        /// The tab '\t' character.
+        /// </summary>
+        public const byte AsciiTab = 9;
+
         private static readonly HashSet<int> EndOfNameCharacters =
         [
             ' ',
             AsciiCarriageReturn,
             AsciiLineFeed,
-            9,
+            AsciiTab,
             '>',
             '<',
             '[',
diff --git a/src/UglyToad.PdfPig.Core/StreamInputBytes.cs b/src/UglyToad.PdfPig.Core/StreamInputBytes.cs
index d29868741..21f215b8f 100644
--- a/src/UglyToad.PdfPig.Core/StreamInputBytes.cs
+++ b/src/UglyToad.PdfPig.Core/StreamInputBytes.cs
@@ -96,6 +96,17 @@ public bool IsAtEnd()
         /// <inheritdoc />
         public void Seek(long position)
         {
+            var current = CurrentOffset;
+            if (position == current)
+            {
+                return;
+            }
+            else if (peekByte.HasValue && position == current + 1)
+            {
+                MoveNext();
+                return;
+            }
+
             isAtEnd = false;
             peekByte = null;
 
diff --git a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1ArrayTokenizer.cs b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1ArrayTokenizer.cs
index eb709f04f..e1d6e30c5 100644
--- a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1ArrayTokenizer.cs
+++ b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1ArrayTokenizer.cs
@@ -14,7 +14,7 @@ public sealed class Type1ArrayTokenizer : ITokenizer
         /// <inheritdoc />
         public bool ReadsNextByte { get; } = false;
 
-        private static readonly string[] Space = [" "];
+        private static readonly char[] Space = [' '];
 
         /// <inheritdoc />
         public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
diff --git a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs
index 606725be1..6b3e26e4a 100644
--- a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs
+++ b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs
@@ -88,6 +88,11 @@ public static Type1Font Parse(IInputBytes inputBytes, int length1, int length2)
                         {
                             int offset = 0;
 
+                            while (inputBytes.Peek() is { } b && ReadHelper.IsWhitespace(b))
+                            {
+                                inputBytes.MoveNext();
+                            }
+
                             while (inputBytes.MoveNext())
                             {
                                 if (inputBytes.CurrentByte == (byte)ClearToMark[offset])
diff --git a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1Tokenizer.cs b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1Tokenizer.cs
index 333f008ed..aac323f3f 100644
--- a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1Tokenizer.cs
+++ b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1Tokenizer.cs
@@ -2,6 +2,7 @@
 {
     using System;
     using System.Collections.Generic;
+    using System.Diagnostics;
     using System.Globalization;
     using System.Text;
     using Core;
@@ -41,35 +42,43 @@ private Type1Token ReadNextToken()
             do
             {
                 skip = false;
-                while (bytes.MoveNext())
+                while (bytes.Peek() is { } b)
                 {
-                    var b = bytes.CurrentByte;
                     var c = (char)b;
 
                     switch (c)
                     {
                         case '%':
+                            bytes.MoveNext();
                             comments.Add(ReadComment());
                             break;
                         case '(':
+                            bytes.MoveNext();
                             return ReadString();
                         case ')':
                             throw new InvalidOperationException("Encountered an end of string ')' outside of string.");
                         case '[':
+                            bytes.MoveNext();
                             return new Type1Token(c, Type1Token.TokenType.StartArray);
                         case ']':
+                            bytes.MoveNext();
                             return new Type1Token(c, Type1Token.TokenType.EndArray);
                         case '{':
+                            bytes.MoveNext();
                             return new Type1Token(c, Type1Token.TokenType.StartProc);
                         case '}':
+                            bytes.MoveNext();
                             return new Type1Token(c, Type1Token.TokenType.EndProc);
                         case '/':
                             {
-                                var name = ReadLiteral();
+                                bytes.MoveNext();
+                                TryReadLiteral(out var name);
+                                Debug.Assert(name != null);
                                 return new Type1Token(name, Type1Token.TokenType.Literal);
                             }
                         case '<':
                             {
+                                bytes.MoveNext();
                                 var following = bytes.Peek();
                                 if (following == '<')
                                 {
@@ -81,6 +90,7 @@ private Type1Token ReadNextToken()
                             }
                         case '>':
                             {
+                                bytes.MoveNext();
                                 var following = bytes.Peek();
                                 if (following == '>')
                                 {
@@ -94,23 +104,24 @@ private Type1Token ReadNextToken()
                             {
                                 if (ReadHelper.IsWhitespace(b))
                                 {
+                                    bytes.MoveNext();
                                     skip = true;
                                     break;
                                 }
 
                                 if (b == 0)
                                 {
+                                    bytes.MoveNext();
                                     skip = true;
                                     break;
                                 }
 
-                                if (TryReadNumber(c, out var number))
+                                if (TryReadNumber(out var number))
                                 {
                                     return number;
                                 }
 
-                                var name = ReadLiteral(c);
-                                if (name == null)
+                                if (!TryReadLiteral(out var name))
                                 {
                                     throw new InvalidOperationException($"The binary portion of the type 1 font was invalid at position {bytes.CurrentOffset}.");
                                 }
@@ -197,12 +208,21 @@ char GetNext()
             return null;
         }
 
-        private bool TryReadNumber(char c, out Type1Token numberToken)
+        private bool TryReadNumber(out Type1Token numberToken)
         {
             char GetNext()
             {
                 bytes.MoveNext();
-                return (char)bytes.CurrentByte;
+                return (char)(bytes.Peek() ?? 0);
+            }
+
+            char c = (char)(bytes.Peek() ?? 0);
+
+            if (!((c >= '0' && c <= '9') || c is '+' or '-'))
+            {
+                // Easy out. Not a valid number
+                numberToken = null;
+                return false;
             }
 
             numberToken = null;
@@ -251,8 +271,6 @@ char GetNext()
             else
             {
                 // integer
-                bytes.Seek(bytes.CurrentOffset - 1);
-
                 numberToken = new Type1Token(sb.ToString(), Type1Token.TokenType.Integer);
                 return true;
             }
@@ -309,7 +327,6 @@ char GetNext()
                 }
             }
 
-            bytes.Seek(bytes.CurrentOffset - 1);
             if (radix != null)
             {
                 var number = Convert.ToInt32(sb.ToString(), int.Parse(radix.ToString(), CultureInfo.InvariantCulture));
@@ -323,14 +340,9 @@ char GetNext()
             return true;
         }
 
-        private string ReadLiteral(char? previousCharacter = null)
+        private bool TryReadLiteral(out string? value)
         {
             literalBuffer.Clear();
-            if (previousCharacter.HasValue)
-            {
-                literalBuffer.Append(previousCharacter);
-            }
-
             do
             {
                 var b = bytes.Peek();
@@ -350,8 +362,16 @@ private string ReadLiteral(char? previousCharacter = null)
                 literalBuffer.Append(c);
             } while (bytes.MoveNext());
 
-            var literal = literalBuffer.ToString();
-            return literal.Length == 0 ? null : literal;
+            if (literalBuffer.Length > 0)
+            {
+                value = literalBuffer.ToString();
+                return true;
+            }
+            else
+            {
+                value = null;
+                return false;
+            }
         }
 
         private string ReadComment()
@@ -375,9 +395,10 @@ private string ReadComment()
         private Type1DataToken ReadCharString(int length)
         {
             // Skip preceding space.
-            bytes.MoveNext();
-            // TODO: may be wrong
-           // bytes.MoveNext();
+            if (bytes.Peek() is { } ws && ReadHelper.IsWhitespace(ws))
+            {
+                bytes.MoveNext();
+            }
 
             byte[] data = new byte[length];
             for (int i = 0; i < length; i++)
diff --git a/src/UglyToad.PdfPig.Tests/Tokenization/NumericTokenizerTests.cs b/src/UglyToad.PdfPig.Tests/Tokenization/NumericTokenizerTests.cs
index be8a19a25..fa9654ff9 100644
--- a/src/UglyToad.PdfPig.Tests/Tokenization/NumericTokenizerTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Tokenization/NumericTokenizerTests.cs
@@ -91,7 +91,10 @@ public void OnlyParsesNumberPart()
             Assert.True(result);
             Assert.Equal(135.6654, AssertNumericToken(token).Data);
 
-            Assert.Equal('/', (char)input.Bytes.CurrentByte);
+            if (tokenizer.ReadsNextByte)
+                Assert.Equal('/', (char)input.Bytes.CurrentByte);
+            else
+                Assert.Equal('4', (char)input.Bytes.CurrentByte);
         }
 
         [Fact]
diff --git a/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs
index 8c95e41ff..da1f468dd 100644
--- a/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs
@@ -9,7 +9,7 @@ internal sealed class ArrayTokenizer : ITokenizer
     {
         private readonly bool usePdfDocEncoding;
 
-        public bool ReadsNextByte { get; } = false;
+        public bool ReadsNextByte => false;
 
         public ArrayTokenizer(bool usePdfDocEncoding)
         {
diff --git a/src/UglyToad.PdfPig.Tokenization/CommentTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/CommentTokenizer.cs
index cd51a0e23..25c588952 100644
--- a/src/UglyToad.PdfPig.Tokenization/CommentTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/CommentTokenizer.cs
@@ -6,7 +6,7 @@
 
     internal sealed class CommentTokenizer : ITokenizer
     {
-        public bool ReadsNextByte { get; } = true;
+        public bool ReadsNextByte => false;
 
         public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
         {
@@ -17,10 +17,11 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
                 return false;
             }
 
-            using var builder = new ValueStringBuilder();
+            using var builder = new ValueStringBuilder(stackalloc char[32]);
 
-            while (inputBytes.MoveNext() && !ReadHelper.IsEndOfLine(inputBytes.CurrentByte))
+            while (inputBytes.Peek() is { } c && !ReadHelper.IsEndOfLine(c))
             {
+                inputBytes.MoveNext();
                 builder.Append((char) inputBytes.CurrentByte);
             }
 
diff --git a/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs
index 213fdcfc2..3b75ffcc6 100644
--- a/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs
@@ -11,7 +11,7 @@ internal class DictionaryTokenizer : ITokenizer
         private readonly IReadOnlyList<NameToken> requiredKeys;
         private readonly bool useLenientParsing;
 
-        public bool ReadsNextByte { get; } = false;
+        public bool ReadsNextByte => false;
 
         /// <summary>
         /// Create a new <see cref="DictionaryTokenizer"/>.
diff --git a/src/UglyToad.PdfPig.Tokenization/EndOfLineTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/EndOfLineTokenizer.cs
index 7fb7d2c7a..66597f646 100644
--- a/src/UglyToad.PdfPig.Tokenization/EndOfLineTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/EndOfLineTokenizer.cs
@@ -9,7 +9,7 @@
     public sealed class EndOfLineTokenizer : ITokenizer
     {
         /// <inheritdoc />
-        public bool ReadsNextByte { get; } = false;
+        public bool ReadsNextByte => false;
 
         /// <inheritdoc />
         public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
diff --git a/src/UglyToad.PdfPig.Tokenization/HexTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/HexTokenizer.cs
index 72fe06772..92bc16309 100644
--- a/src/UglyToad.PdfPig.Tokenization/HexTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/HexTokenizer.cs
@@ -5,7 +5,7 @@
 
     internal sealed class HexTokenizer : ITokenizer
     {
-        public bool ReadsNextByte { get; } = false;
+        public bool ReadsNextByte => false;
 
         public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
         {
diff --git a/src/UglyToad.PdfPig.Tokenization/NameTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/NameTokenizer.cs
index 1aa624471..16974167a 100644
--- a/src/UglyToad.PdfPig.Tokenization/NameTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/NameTokenizer.cs
@@ -11,14 +11,14 @@
 
     internal sealed class NameTokenizer : ITokenizer
     {
+#if NET
         static NameTokenizer()
         {
-#if NET
             Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
-#endif
         }
+#endif
 
-        public bool ReadsNextByte { get; } = true;
+        public bool ReadsNextByte => false;
 
         public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
         {
@@ -35,10 +35,8 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
             int postEscapeRead = 0;
             Span<char> escapedChars = stackalloc char[2];
 
-            while (inputBytes.MoveNext())
+            while (inputBytes.Peek() is { } b)
             {
-                var b = inputBytes.CurrentByte;
-
                 if (b == '#')
                 {
                     escapeActive = true;
@@ -52,8 +50,9 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
 
                         if (postEscapeRead == 2)
                         {
-                            int high = escapedChars[0] <= '9' ? escapedChars[0] - '0' : char.ToUpper(escapedChars[0]) - 'A' + 10;
-                            int low = escapedChars[1] <= '9' ? escapedChars[1] - '0' : char.ToUpper(escapedChars[1]) - 'A' + 10;
+                            // We validated that the char is hex. So assume ASCII rules apply and shortcut hex decoding
+                            int high = escapedChars[0] <= '9' ? escapedChars[0] - '0' : ((escapedChars[0] & 0xF) + 9);
+                            int low = escapedChars[1] <= '9' ? escapedChars[1] - '0' : ((escapedChars[1] & 0xF) + 9);
 
                             byte characterToWrite = (byte)(high * 16 + low);
 
@@ -100,6 +99,8 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
                 {
                     bytes.Write(b);
                 }
+
+                inputBytes.MoveNext();
             }
 
 #if NET8_0_OR_GREATER
diff --git a/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs
index e5123eb47..55413f0d7 100644
--- a/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/NumericTokenizer.cs
@@ -7,15 +7,7 @@ namespace UglyToad.PdfPig.Tokenization;
 
 internal sealed class NumericTokenizer : ITokenizer
 {
-    private const byte Zero = 48;
-    private const byte Nine = 57;
-    private const byte Negative = (byte)'-';
-    private const byte Positive = (byte)'+';
-    private const byte Period = (byte)'.';
-    private const byte ExponentLower = (byte)'e';
-    private const byte ExponentUpper = (byte)'E';
-
-    public bool ReadsNextByte => true;
+    public bool ReadsNextByte => false;
 
     public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken? token)
     {
@@ -37,30 +29,50 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken? to
         var isExponentNegative = false;
         var exponentPart = 0;
 
-        do
+        byte? firstByte = currentByte;
+        bool noRead = true;
+        bool acceptSign = true;
+        while (!inputBytes.IsAtEnd() || firstByte is { })
         {
-            var b = inputBytes.CurrentByte;
-            if (b >= Zero && b <= Nine)
+            if (firstByte is { } b)
+            {
+                firstByte = null;
+            }
+            else if (noRead)
+            {
+                noRead = false;
+                b = inputBytes.Peek() ?? 0;
+            }
+            else
             {
+                inputBytes.MoveNext();
+                b = inputBytes.Peek() ?? 0;
+            }
+
+            if (b >= '0' && b <= '9')
+            {
+                var value = b - '0';
                 if (hasExponent)
                 {
-                    exponentPart = (exponentPart * 10) + (b - Zero);
+                    exponentPart = (exponentPart * 10) + value;
                 }
                 else if (hasFraction)
                 {
-                    fractionalPart = (fractionalPart * 10) + (b - Zero);
+                    fractionalPart = (fractionalPart * 10) + value;
                     fractionalCount++;
                 }
                 else
                 {
-                    integerPart = (integerPart * 10) + (b - Zero);
+                    integerPart = (integerPart * 10) + value;
                 }
+                acceptSign = false;
             }
-            else if (b == Positive)
+            else if (b == '+' && acceptSign)
             {
                 // Has no impact
+                acceptSign = false;
             }
-            else if (b == Negative)
+            else if (b == '-' && acceptSign)
             {
                 if (hasExponent)
                 {
@@ -70,30 +82,17 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken? to
                 {
                     isNegative = true;
                 }
+                // acceptSign = false; // Somehow we have a test that expects to support "--21.72" to return -21.72
             }
-            else if (b == Period)
+            else if (b == '.' && !hasExponent && !hasFraction)
             {
-                if (hasExponent || hasFraction)
-                {
-                    return false;
-                }
-
                 hasFraction = true;
+                acceptSign = false;
             }
-            else if (b == ExponentLower || b == ExponentUpper)
+            else if ((b == 'e' || b == 'E') && readBytes > 0 && !hasExponent)
             {
-                // Don't allow leading exponent.
-                if (readBytes == 0)
-                {
-                    return false;
-                }
-
-                if (hasExponent)
-                {
-                    return false;
-                }
-
                 hasExponent = true;
+                acceptSign = true;
             }
             else
             {
@@ -107,7 +106,7 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken? to
             }
 
             readBytes++;
-        } while (inputBytes.MoveNext());
+        }
 
         if (hasExponent && !isExponentNegative)
         {
diff --git a/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs
index 5019eda8c..c637d7aa2 100644
--- a/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/PlainTokenizer.cs
@@ -6,7 +6,7 @@
 
     internal sealed class PlainTokenizer : ITokenizer
     {
-        public bool ReadsNextByte { get; } = true;
+        public bool ReadsNextByte => false;
 
         public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
         {
@@ -21,18 +21,11 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
 
             builder.Append((char)currentByte);
             
-            while (inputBytes.MoveNext())
+            while (inputBytes.Peek() is { } b
+                && !ReadHelper.IsWhitespace(b)
+                && (char)b is not '<' and not '[' and not '/' and not ']' and not '>' and not '(' and not ')')
             {
-                if (ReadHelper.IsWhitespace(inputBytes.CurrentByte))
-                {
-                    break;
-                }
-
-                if (inputBytes.CurrentByte is (byte)'<' or (byte)'[' or (byte)'/' or (byte)']' or (byte)'>' or (byte)'(' or (byte)')')
-                {
-                    break;
-                }
-
+                inputBytes.MoveNext();
                 builder.Append((char) inputBytes.CurrentByte);
             }
 
diff --git a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs
index 9d74ba031..228862fbd 100644
--- a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs
+++ b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs
@@ -246,7 +246,7 @@ public bool MoveNext()
 
                 /* 
                  * Some tokenizers need to read the symbol of the next token to know if they have ended
-                 * so we don't want to move on to the next byte, we would lose a byte, e.g.: /NameOne/NameTwo or /Name(string)                
+                 * so we don't want to move on to the next byte, we would lose a byte, e.g.: /NameOne/NameTwo or /Name(string)
                  */
                 hasBytePreRead = tokenizer.ReadsNextByte;
 
@@ -317,12 +317,13 @@ private List<byte> ReadInlineImageData()
         {
             // The ID operator should be followed by a single white-space character, and the next character is interpreted
             // as the first byte of image data. 
-            if (!ReadHelper.IsWhitespace(inputBytes.CurrentByte))
+            if (inputBytes.Peek() is { } c
+                && !ReadHelper.IsWhitespace(c))
             {
                 throw new PdfDocumentFormatException($"No whitespace character following the image data (ID) operator. Position: {inputBytes.CurrentOffset}.");
             }
 
-            var startsAt = inputBytes.CurrentOffset - 2;
+            var startsAt = inputBytes.CurrentOffset - 1;
 
             return ReadUntilEndImage(startsAt);
         }
diff --git a/src/UglyToad.PdfPig/Parser/FileStructure/FileHeaderParser.cs b/src/UglyToad.PdfPig/Parser/FileStructure/FileHeaderParser.cs
index 237515bc1..8775244cb 100644
--- a/src/UglyToad.PdfPig/Parser/FileStructure/FileHeaderParser.cs
+++ b/src/UglyToad.PdfPig/Parser/FileStructure/FileHeaderParser.cs
@@ -80,9 +80,8 @@ private static HeaderVersion GetHeaderVersionAndResetScanner(CommentToken commen
             }
 
             var atEnd = scanner.CurrentPosition == scanner.Length;
-            var rewind = atEnd ? 1 : 2;
 
-            var commentOffset = scanner.CurrentPosition - comment.Data.Length - rewind;
+            var commentOffset = scanner.CurrentPosition - comment.Data.Length - 1;
 
             scanner.Seek(0);
 
diff --git a/src/UglyToad.PdfPig/Parser/FileStructure/FirstPassParser.StartXref.cs b/src/UglyToad.PdfPig/Parser/FileStructure/FirstPassParser.StartXref.cs
index 43b24b722..984083236 100644
--- a/src/UglyToad.PdfPig/Parser/FileStructure/FirstPassParser.StartXref.cs
+++ b/src/UglyToad.PdfPig/Parser/FileStructure/FirstPassParser.StartXref.cs
@@ -10,13 +10,56 @@ internal static partial class FirstPassParser
 {
     private static ReadOnlySpan<byte> StartXRefBytes => "startxref"u8;
 
+    public const long EndOfFileBufferSize = 1024;
+
     public static StartXRefLocation GetFirstCrossReferenceOffset(
         IInputBytes bytes,
         ISeekableTokenScanner scanner,
         ILog log)
     {
+        // We used to read backward through the file, but this is quite expensive for streams that directly wrap OS files.
+        // Instead we fetch the last 1024 bytes of the file and do a memory search, as cheap first attempt. This is significantly faster
+        // in practice, if there is no in-process caching of the file involved
+        // 
+        // If that fails (in practice it should never) we fall back to the old method of reading backwards.
         var fileLength = bytes.Length;
+        {
+            var fetchFrom = Math.Max(bytes.Length - EndOfFileBufferSize, 0L);
+
+            bytes.Seek(fetchFrom);
+
+            Span<byte> byteBuffer = new byte[bytes.Length - fetchFrom];   // TODO: Maybe use PoolArray?
+
+            int n = bytes.Read(byteBuffer);
+
+            if (n == byteBuffer.Length)
+            {
+                int lx = byteBuffer.LastIndexOf("startxref"u8);
+
+                if (lx < 0)
+                {
+                    // See old code. We also try a mangled version
+                    lx = byteBuffer.LastIndexOf("startref"u8);
+                }
+
+                if (lx >= 0)
+                {
+                    scanner.Seek(fetchFrom + lx);
+
+                    if (scanner.TryReadToken(out OperatorToken startXrefOp) && (startXrefOp.Data == "startxref" || startXrefOp.Data == "startref"))
+                    {
+                        var pos = GetNumericTokenFollowingCurrent(scanner);
+
+                        log.Debug($"Found startxref at {pos}");
+
+                        return new StartXRefLocation(fetchFrom + lx, pos);
+                    }
+                }
+
+            }
+        }
 
+        // Now fall through in the old code
         var buffer = new CircularByteBuffer(StartXRefBytes.Length);
 
         // Start from the end of the file
diff --git a/src/UglyToad.PdfPig/Parser/Parts/BruteForceSearcher.cs b/src/UglyToad.PdfPig/Parser/Parts/BruteForceSearcher.cs
index 0ecde8965..e26ba0e08 100644
--- a/src/UglyToad.PdfPig/Parser/Parts/BruteForceSearcher.cs
+++ b/src/UglyToad.PdfPig/Parser/Parts/BruteForceSearcher.cs
@@ -57,7 +57,7 @@ public static IReadOnlyDictionary<IndirectReference, long> GetObjectLocations(II
                     {
                         var next = bytes.Peek();
 
-                        if (next.HasValue && next == 'n')
+                        if (next == 'n')
                         {
                             if (ReadHelper.IsString(bytes, "endobj"))
                             {
diff --git a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs
index 8fbcf4180..c5c9fcd42 100644
--- a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs
+++ b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs
@@ -465,7 +465,7 @@ private bool TryReadStream(long startStreamTokenOffset, bool getLength, [NotNull
                 read++;
             }
 
-            long streamDataEnd = inputBytes.CurrentOffset + 1;
+            long streamDataEnd = inputBytes.CurrentOffset;
 
             if (possibleEndLocation == null)
                 return false;