diff --git a/src/UglyToad.PdfPig.Core/StackDepthGuard.cs b/src/UglyToad.PdfPig.Core/StackDepthGuard.cs
new file mode 100644
index 000000000..b116bdf0b
--- /dev/null
+++ b/src/UglyToad.PdfPig.Core/StackDepthGuard.cs
@@ -0,0 +1,63 @@
+namespace UglyToad.PdfPig.Core
+{
+ ///
+ /// Provides a guard for tracking and limiting the depth of nested stack operations, such as recursive calls or
+ /// nested parsing.
+ ///
+ /// Use this class to prevent excessive stack usage by enforcing a maximum nesting depth. This is
+ /// particularly useful in scenarios where untrusted or deeply nested input could cause stack overflows or
+ /// performance issues.
+ public sealed class StackDepthGuard
+ {
+ ///
+ /// Represents a stack depth guard with no effective limit on the allowed depth.
+ ///
+ /// Use this instance when stack depth restrictions are not required.
+ public static readonly StackDepthGuard Infinite = new StackDepthGuard(int.MaxValue);
+
+ private readonly int maxStackDepth;
+
+ private int depth;
+
+ ///
+ /// Initializes a new instance of the StackDepthGuard class with the specified maximum stack depth.
+ ///
+ /// The maximum allowed stack depth for guarded operations. Must be a positive integer.
+ public StackDepthGuard(int maxStackDepth)
+ {
+ if (maxStackDepth <= 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(maxStackDepth));
+ }
+ this.maxStackDepth = maxStackDepth;
+ }
+
+ ///
+ /// Increments the current nesting depth and checks against the maximum allowed stack depth.
+ ///
+ /// Thrown if the maximum allowed nesting depth is exceeded.
+ public void Enter()
+ {
+ if (++depth > maxStackDepth)
+ {
+ depth--; // Decrement so Exit remains balanced if someone catches this
+ throw new PdfDocumentFormatException($"Exceeded maximum nesting depth of {maxStackDepth}.");
+ }
+ }
+
+ ///
+ /// Decreases the current depth level by one, ensuring that the depth does not become negative.
+ ///
+ /// If the current depth is already zero, calling this method has no effect. This method
+ /// is typically used to track or manage nested operations or scopes where depth must remain
+ /// non-negative.
+ public void Exit()
+ {
+ depth--;
+ if (depth < 0)
+ {
+ depth = 0;
+ }
+ }
+ }
+}
diff --git a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs
index bd64e98d9..1b14a8c66 100644
--- a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs
+++ b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs
@@ -21,15 +21,16 @@ public static class Type1FontParser
private static readonly char[] Separators = [' '];
private static readonly Type1EncryptedPortionParser EncryptedPortionParser = new Type1EncryptedPortionParser();
-
+
///
/// Parses an embedded Adobe Type 1 font file.
///
/// The bytes of the font program.
/// The length in bytes of the clear text portion of the font program.
/// The length in bytes of the encrypted portion of the font program.
+ ///
/// The parsed type 1 font.
- public static Type1Font Parse(IInputBytes inputBytes, int length1, int length2)
+ public static Type1Font Parse(IInputBytes inputBytes, int length1, int length2, StackDepthGuard stackDepthGuard)
{
// Sometimes the entire PFB file including the header bytes can be included which prevents parsing in the normal way.
var isEntirePfbFile = inputBytes.Peek() == PfbFileIndicator;
@@ -44,7 +45,7 @@ public static Type1Font Parse(IInputBytes inputBytes, int length1, int length2)
inputBytes = new MemoryInputBytes(ascii);
}
- var scanner = new CoreTokenScanner(inputBytes, false);
+ var scanner = new CoreTokenScanner(inputBytes, false, stackDepthGuard);
if (!scanner.TryReadToken(out CommentToken comment) || !comment.Data.StartsWith("!"))
{
diff --git a/src/UglyToad.PdfPig.Tests/Fonts/Cmap/CodespaceRangeTests.cs b/src/UglyToad.PdfPig.Tests/Fonts/Cmap/CodespaceRangeTests.cs
index dbe6611e7..39193a053 100644
--- a/src/UglyToad.PdfPig.Tests/Fonts/Cmap/CodespaceRangeTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Fonts/Cmap/CodespaceRangeTests.cs
@@ -105,7 +105,7 @@ public void ColorspaceParserError()
{
var parser = new CodespaceRangeParser();
var byteArrayInput = new MemoryInputBytes(OtherEncodings.StringAsLatin1Bytes("1 begincodespacerange\nendcodespacerange"));
- var tokenScanner = new CoreTokenScanner(byteArrayInput, false);
+ var tokenScanner = new CoreTokenScanner(byteArrayInput, false, new StackDepthGuard(256));
Assert.True(tokenScanner.MoveNext());
Assert.True(tokenScanner.CurrentToken is NumericToken);
diff --git a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs
index d7be60f71..ba2ab9cd7 100644
--- a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs
@@ -11,7 +11,7 @@ public void CanReadHexEncryptedPortion()
{
var bytes = GetFileBytes("AdobeUtopia.pfa");
- Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0);
+ Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0, new StackDepthGuard(256));
}
[Fact]
@@ -20,7 +20,7 @@ public void CanReadBinaryEncryptedPortionOfFullPfb()
// TODO: support reading in these pfb files
var bytes = GetFileBytes("Raleway-Black.pfb");
- Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0);
+ Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0, new StackDepthGuard(256));
}
[Fact]
@@ -28,7 +28,7 @@ public void CanReadCharStrings()
{
var bytes = GetFileBytes("CMBX10.pfa");
- Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0);
+ Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0, new StackDepthGuard(256));
}
[Fact]
@@ -36,7 +36,7 @@ public void CanReadEncryptedPortion()
{
var bytes = GetFileBytes("CMCSC10");
- Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0);
+ Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0, new StackDepthGuard(256));
}
[Fact]
@@ -44,7 +44,7 @@ public void CanReadAsciiPart()
{
var bytes = GetFileBytes("CMBX12");
- Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0);
+ Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0, new StackDepthGuard(256));
}
[Fact]
@@ -52,7 +52,7 @@ public void OutputCmbx10Svgs()
{
var bytes = GetFileBytes("CMBX10");
- var result = Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0);
+ var result = Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0, new StackDepthGuard(256));
var builder = new StringBuilder("
");
foreach (var charString in result.CharStrings.CharStrings)
@@ -71,7 +71,7 @@ public void CanReadFontWithCommentsInOtherSubrs()
{
var bytes = GetFileBytes("CMR10");
- Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0);
+ Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0, new StackDepthGuard(256));
}
private static byte[] GetFileBytes(string name)
diff --git a/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs b/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs
index 25ae26a63..8d2a48bea 100644
--- a/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs
@@ -11,6 +11,20 @@
public class GithubIssuesTests
{
+ [Fact]
+ public void Issue1217()
+ {
+ var path = IntegrationHelpers.GetSpecificTestDocumentPath("stackoverflow_error.pdf");
+
+ var options = new ParsingOptions()
+ {
+ UseLenientParsing = true,
+ MaxStackDepth = 100
+ };
+ var ex = Assert.Throws(() => PdfDocument.Open(path, options));
+ Assert.Equal($"Exceeded maximum nesting depth of {options.MaxStackDepth}.", ex.Message);
+ }
+
[Fact]
public void Issue1223()
{
diff --git a/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/stackoverflow_error.pdf b/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/stackoverflow_error.pdf
new file mode 100644
index 000000000..f20a87ab3
Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/stackoverflow_error.pdf differ
diff --git a/src/UglyToad.PdfPig.Tests/Parser/FileStructure/FirstPassParserTests.cs b/src/UglyToad.PdfPig.Tests/Parser/FileStructure/FirstPassParserTests.cs
index f4ab1c5e8..d47cec01d 100644
--- a/src/UglyToad.PdfPig.Tests/Parser/FileStructure/FirstPassParserTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Parser/FileStructure/FirstPassParserTests.cs
@@ -42,7 +42,7 @@ 0000000576 00000 n
var results = FirstPassParser.Parse(
new FileHeaderOffset(0),
ib.Bytes,
- new CoreTokenScanner(ib.Bytes, true));
+ new CoreTokenScanner(ib.Bytes, true, new StackDepthGuard(256)));
Assert.Equal(2, results.Parts.Count);
Assert.NotNull(results.Trailer);
@@ -114,7 +114,7 @@ 0000004385 00000 n
var ib = StringBytesTestConverter.Convert(content, false);
- var results = FirstPassParser.Parse(new FileHeaderOffset(0), ib.Bytes, new CoreTokenScanner(ib.Bytes, true));
+ var results = FirstPassParser.Parse(new FileHeaderOffset(0), ib.Bytes, new CoreTokenScanner(ib.Bytes, true, new StackDepthGuard(256)));
var offsets = results.Parts.Select(x => x.Offset).OrderBy(x => x).ToList();
@@ -123,7 +123,7 @@ 0000004385 00000 n
Assert.NotNull(results.Trailer);
ib.Bytes.Seek(98);
- var scanner = new CoreTokenScanner(ib.Bytes, false);
+ var scanner = new CoreTokenScanner(ib.Bytes, false, new StackDepthGuard(256));
scanner.MoveNext();
Assert.Equal(scanner.CurrentToken, OperatorToken.Xref);
}
diff --git a/src/UglyToad.PdfPig.Tests/Parser/PageContentParserTests.cs b/src/UglyToad.PdfPig.Tests/Parser/PageContentParserTests.cs
index 4ced371b2..b88d169e4 100644
--- a/src/UglyToad.PdfPig.Tests/Parser/PageContentParserTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Parser/PageContentParserTests.cs
@@ -15,7 +15,7 @@
public class PageContentParserTests
{
- private readonly PageContentParser parser = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance);
+ private readonly PageContentParser parser = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, new StackDepthGuard(256));
private readonly ILog log = new NoOpLog();
[Fact]
@@ -210,7 +210,7 @@ public void CorrectlyHandlesFile0007511CorruptInlineImage()
var content = File.ReadAllText(path);
var input = StringBytesTestConverter.Convert(content, false);
- var lenientParser = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, true);
+ var lenientParser = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, new StackDepthGuard(256), true);
var result = lenientParser.Parse(1, input.Bytes, log);
Assert.NotEmpty(result);
diff --git a/src/UglyToad.PdfPig.Tests/Parser/Parts/FileHeaderParserTests.cs b/src/UglyToad.PdfPig.Tests/Parser/Parts/FileHeaderParserTests.cs
index a0de77930..2d2ed5f79 100644
--- a/src/UglyToad.PdfPig.Tests/Parser/Parts/FileHeaderParserTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Parser/Parts/FileHeaderParserTests.cs
@@ -149,7 +149,7 @@ public void Issue334()
var bytes = new MemoryInputBytes(input);
- var scanner = new CoreTokenScanner(bytes, true, ScannerScope.None);
+ var scanner = new CoreTokenScanner(bytes, true, new StackDepthGuard(256), ScannerScope.None);
var result = FileHeaderParser.Parse(scanner, bytes, false, log);
diff --git a/src/UglyToad.PdfPig.Tests/Parser/Parts/FileTrailerParserTests.cs b/src/UglyToad.PdfPig.Tests/Parser/Parts/FileTrailerParserTests.cs
index b0e8ee4ae..505105381 100644
--- a/src/UglyToad.PdfPig.Tests/Parser/Parts/FileTrailerParserTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Parser/Parts/FileTrailerParserTests.cs
@@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.Tests.Parser.Parts;
+using PdfPig.Core;
using PdfPig.Parser.FileStructure;
using PdfPig.Tokenization.Scanner;
@@ -26,7 +27,7 @@ 12 0 obj
var result = FirstPassParser.GetFirstCrossReferenceOffset(
input.Bytes,
- new CoreTokenScanner(input.Bytes, true),
+ new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)),
new TestingLog());
Assert.Equal(456, result.StartXRefDeclaredOffset);
@@ -59,7 +60,7 @@ 12 0 obj
var result = FirstPassParser.GetFirstCrossReferenceOffset(
input.Bytes,
- new CoreTokenScanner(input.Bytes, true),
+ new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)),
new TestingLog());
Assert.Equal(17, result.StartXRefDeclaredOffset);
@@ -93,7 +94,7 @@ 12 0 obj
var result = FirstPassParser.GetFirstCrossReferenceOffset(
input.Bytes,
- new CoreTokenScanner(input.Bytes, true),
+ new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)),
new TestingLog());
Assert.Equal(1384733, result.StartXRefDeclaredOffset);
@@ -106,7 +107,7 @@ public void BadInputBytesReturnsNull()
var result = FirstPassParser.GetFirstCrossReferenceOffset(
input.Bytes,
- new CoreTokenScanner(input.Bytes, true),
+ new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)),
new TestingLog());
Assert.Null(result.StartXRefDeclaredOffset);
@@ -130,7 +131,7 @@ 11 0 obj
var result = FirstPassParser.GetFirstCrossReferenceOffset(
input.Bytes,
- new CoreTokenScanner(input.Bytes, true),
+ new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)),
new TestingLog());
Assert.Null(result.StartXRefDeclaredOffset);
@@ -151,7 +152,7 @@ 1 0 obj
var result = FirstPassParser.GetFirstCrossReferenceOffset(
input.Bytes,
- new CoreTokenScanner(input.Bytes, true),
+ new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)),
new TestingLog());
Assert.Null(result.StartXRefDeclaredOffset);
@@ -185,7 +186,7 @@ 12 0 obj
var result = FirstPassParser.GetFirstCrossReferenceOffset(
input.Bytes,
- new CoreTokenScanner(input.Bytes, true),
+ new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)),
new TestingLog());
Assert.Equal(1274665676543, result.StartXRefDeclaredOffset);
@@ -207,7 +208,7 @@ public void CanReadStartXrefIfCommentsPresent()
var result = FirstPassParser.GetFirstCrossReferenceOffset(
input.Bytes,
- new CoreTokenScanner(input.Bytes, true),
+ new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)),
new TestingLog());
Assert.Equal(57695, result.StartXRefDeclaredOffset);
diff --git a/src/UglyToad.PdfPig.Tests/StringBytesTestConverter.cs b/src/UglyToad.PdfPig.Tests/StringBytesTestConverter.cs
index d91647d49..537f133ce 100644
--- a/src/UglyToad.PdfPig.Tests/StringBytesTestConverter.cs
+++ b/src/UglyToad.PdfPig.Tests/StringBytesTestConverter.cs
@@ -34,7 +34,7 @@ public class Result
internal static (CoreTokenScanner scanner, IInputBytes bytes) Scanner(string s)
{
var inputBytes = new MemoryInputBytes(OtherEncodings.StringAsLatin1Bytes(s));
- var result = new CoreTokenScanner(inputBytes, true);
+ var result = new CoreTokenScanner(inputBytes, true, new StackDepthGuard(256));
return (result, inputBytes);
}
diff --git a/src/UglyToad.PdfPig.Tests/Tokenization/ArrayTokenizerTests.cs b/src/UglyToad.PdfPig.Tests/Tokenization/ArrayTokenizerTests.cs
index 7c0d2d81e..93478cff4 100644
--- a/src/UglyToad.PdfPig.Tests/Tokenization/ArrayTokenizerTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Tokenization/ArrayTokenizerTests.cs
@@ -1,11 +1,12 @@
namespace UglyToad.PdfPig.Tests.Tokenization
{
+ using PdfPig.Core;
using PdfPig.Tokenization;
using PdfPig.Tokens;
public class ArrayTokenizerTests
{
- private readonly ArrayTokenizer tokenizer = new ArrayTokenizer(true);
+ private readonly ArrayTokenizer tokenizer = new ArrayTokenizer(true, new StackDepthGuard(256));
[Theory]
[InlineData("]")]
diff --git a/src/UglyToad.PdfPig.Tests/Tokenization/DictionaryTokenizerTests.cs b/src/UglyToad.PdfPig.Tests/Tokenization/DictionaryTokenizerTests.cs
index 97031fe41..882f84d21 100644
--- a/src/UglyToad.PdfPig.Tests/Tokenization/DictionaryTokenizerTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Tokenization/DictionaryTokenizerTests.cs
@@ -7,7 +7,7 @@ namespace UglyToad.PdfPig.Tests.Tokenization
public class DictionaryTokenizerTests
{
- private readonly DictionaryTokenizer tokenizer = new DictionaryTokenizer(true);
+ private readonly DictionaryTokenizer tokenizer = new DictionaryTokenizer(true, new StackDepthGuard(256));
[Theory]
[InlineData("[rjee]")]
diff --git a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/CoreTokenScannerTests.cs b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/CoreTokenScannerTests.cs
index e4eb551a6..e36189097 100644
--- a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/CoreTokenScannerTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/CoreTokenScannerTests.cs
@@ -11,7 +11,7 @@ public class CoreTokenScannerTests
public CoreTokenScannerTests()
{
- scannerFactory = x => new CoreTokenScanner(x, true);
+ scannerFactory = x => new CoreTokenScanner(x, true, new StackDepthGuard(256));
}
[Fact]
@@ -231,7 +231,7 @@ public void SkipsCommentsInStreams()
var scanner = new CoreTokenScanner(
StringBytesTestConverter.Convert(content, false).Bytes,
- true,
+ true, new StackDepthGuard(256),
isStream: true);
while (scanner.MoveNext())
@@ -247,7 +247,7 @@ public void SkipsCommentsInStreams()
var nonStreamScanner = new CoreTokenScanner(
StringBytesTestConverter.Convert(content, false).Bytes,
- true,
+ true, new StackDepthGuard(256),
isStream: false);
while (nonStreamScanner.MoveNext())
@@ -293,7 +293,7 @@ 0 0 m
var scanner = new CoreTokenScanner(
StringBytesTestConverter.Convert(content, false).Bytes,
- true,
+ true, new StackDepthGuard(256),
isStream: true);
while (scanner.MoveNext())
diff --git a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs
index 2e83a3702..3fa902b5b 100644
--- a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs
@@ -726,7 +726,8 @@ private static PdfTokenScanner GetScanner(string s, TestObjectLocationProvider l
new TestFilterProvider(),
NoOpEncryptionHandler.Instance,
new FileHeaderOffset(0),
- useLenientParsing ? new ParsingOptions() : ParsingOptions.LenientParsingOff);
+ useLenientParsing ? new ParsingOptions() : ParsingOptions.LenientParsingOff,
+ new StackDepthGuard(256));
}
private static IReadOnlyList ReadToEnd(PdfTokenScanner scanner)
diff --git a/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs
index 8c95e41ff..c054d0e24 100644
--- a/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs
@@ -8,12 +8,14 @@
internal sealed class ArrayTokenizer : ITokenizer
{
private readonly bool usePdfDocEncoding;
+ private readonly StackDepthGuard stackDepthGuard;
public bool ReadsNextByte { get; } = false;
- public ArrayTokenizer(bool usePdfDocEncoding)
+ public ArrayTokenizer(bool usePdfDocEncoding, StackDepthGuard stackDepthGuard)
{
this.usePdfDocEncoding = usePdfDocEncoding;
+ this.stackDepthGuard = stackDepthGuard;
}
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
@@ -25,7 +27,7 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok
return false;
}
- var scanner = new CoreTokenScanner(inputBytes, usePdfDocEncoding, ScannerScope.Array);
+ var scanner = new CoreTokenScanner(inputBytes, usePdfDocEncoding, stackDepthGuard, ScannerScope.Array);
var contents = new List();
diff --git a/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs
index 213fdcfc2..655edfc35 100644
--- a/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs
+++ b/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs
@@ -10,6 +10,7 @@ internal class DictionaryTokenizer : ITokenizer
private readonly bool usePdfDocEncoding;
private readonly IReadOnlyList requiredKeys;
private readonly bool useLenientParsing;
+ private readonly StackDepthGuard stackDepthGuard;
public bool ReadsNextByte { get; } = false;
@@ -19,14 +20,16 @@ internal class DictionaryTokenizer : ITokenizer
///
/// Whether to read strings using the PdfDocEncoding.
///
+ ///
///
/// Can be provided to recover from errors with missing dictionary end symbols if the
/// set of keys expected in the dictionary are known.
///
/// Whether to use lenient parsing.
- public DictionaryTokenizer(bool usePdfDocEncoding, IReadOnlyList requiredKeys = null, bool useLenientParsing = false)
+ public DictionaryTokenizer(bool usePdfDocEncoding, StackDepthGuard stackDepthGuard, IReadOnlyList requiredKeys = null, bool useLenientParsing = false)
{
this.usePdfDocEncoding = usePdfDocEncoding;
+ this.stackDepthGuard = stackDepthGuard;
this.requiredKeys = requiredKeys;
this.useLenientParsing = useLenientParsing;
}
@@ -83,7 +86,7 @@ private bool TryTokenizeInternal(byte currentByte, IInputBytes inputBytes, bool
return false;
}
- var coreScanner = new CoreTokenScanner(inputBytes, usePdfDocEncoding, ScannerScope.Dictionary, useLenientParsing: useLenientParsing);
+ var coreScanner = new CoreTokenScanner(inputBytes, usePdfDocEncoding, stackDepthGuard, ScannerScope.Dictionary, useLenientParsing: useLenientParsing);
var tokens = new List();
diff --git a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs
index 9d74ba031..f1f4e9eac 100644
--- a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs
+++ b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs
@@ -52,12 +52,15 @@ public class CoreTokenScanner : ISeekableTokenScanner
///
private readonly bool isStream;
+ private readonly StackDepthGuard stackDepthGuard;
+
///
/// Create a new from the input.
///
public CoreTokenScanner(
IInputBytes inputBytes,
bool usePdfDocEncoding,
+ StackDepthGuard stackDepthGuard,
ScannerScope scope = ScannerScope.None,
IReadOnlyDictionary> namedDictionaryRequiredKeys = null,
bool useLenientParsing = false,
@@ -65,9 +68,10 @@ public CoreTokenScanner(
{
this.inputBytes = inputBytes ?? throw new ArgumentNullException(nameof(inputBytes));
this.usePdfDocEncoding = usePdfDocEncoding;
+ this.stackDepthGuard = stackDepthGuard;
this.stringTokenizer = new StringTokenizer(usePdfDocEncoding);
- this.arrayTokenizer = new ArrayTokenizer(usePdfDocEncoding);
- this.dictionaryTokenizer = new DictionaryTokenizer(usePdfDocEncoding, useLenientParsing: useLenientParsing);
+ this.arrayTokenizer = new ArrayTokenizer(usePdfDocEncoding, this.stackDepthGuard);
+ this.dictionaryTokenizer = new DictionaryTokenizer(usePdfDocEncoding, this.stackDepthGuard, useLenientParsing: useLenientParsing);
this.scope = scope;
this.namedDictionaryRequiredKeys = namedDictionaryRequiredKeys;
this.useLenientParsing = useLenientParsing;
@@ -101,6 +105,19 @@ public void Seek(long position)
///
public bool MoveNext()
+ {
+ stackDepthGuard.Enter();
+ try
+ {
+ return MoveNextInternal();
+ }
+ finally
+ {
+ stackDepthGuard.Exit();
+ }
+ }
+
+ private bool MoveNextInternal()
{
var endAngleBracesRead = 0;
@@ -169,7 +186,7 @@ public bool MoveNext()
&& CurrentToken is NameToken name
&& namedDictionaryRequiredKeys.TryGetValue(name, out var requiredKeys))
{
- tokenizer = new DictionaryTokenizer(usePdfDocEncoding, requiredKeys, useLenientParsing);
+ tokenizer = new DictionaryTokenizer(usePdfDocEncoding, stackDepthGuard, requiredKeys, useLenientParsing);
}
}
else
diff --git a/src/UglyToad.PdfPig/Parser/PageContentParser.cs b/src/UglyToad.PdfPig/Parser/PageContentParser.cs
index 3d317c297..5b3d058cc 100644
--- a/src/UglyToad.PdfPig/Parser/PageContentParser.cs
+++ b/src/UglyToad.PdfPig/Parser/PageContentParser.cs
@@ -21,6 +21,7 @@ public sealed class PageContentParser : IPageContentParser
{
private readonly IGraphicsStateOperationFactory operationFactory;
private readonly bool useLenientParsing;
+ private readonly StackDepthGuard stackDepthGuard;
///
/// Initialises a new instance of the class.
@@ -28,12 +29,14 @@ public sealed class PageContentParser : IPageContentParser
///
/// The factory responsible for creating graphics state operations.
///
+ ///
///
/// A value indicating whether lenient parsing should be used. Defaults to false.
///
- public PageContentParser(IGraphicsStateOperationFactory operationFactory, bool useLenientParsing = false)
+ public PageContentParser(IGraphicsStateOperationFactory operationFactory, StackDepthGuard stackDepthGuard, bool useLenientParsing = false)
{
this.operationFactory = operationFactory;
+ this.stackDepthGuard = stackDepthGuard;
this.useLenientParsing = useLenientParsing;
}
@@ -55,7 +58,7 @@ public IReadOnlyList Parse(
IInputBytes inputBytes,
ILog log)
{
- var scanner = new CoreTokenScanner(inputBytes, false, useLenientParsing: useLenientParsing);
+ var scanner = new CoreTokenScanner(inputBytes, false, stackDepthGuard, useLenientParsing: useLenientParsing);
var precedingTokens = new List();
var graphicsStateOperations = new List();
diff --git a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs
index 491f79653..9289e2bfb 100644
--- a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs
+++ b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs
@@ -89,7 +89,9 @@ private static PdfDocument Open(IInputBytes inputBytes, ParsingOptions? options
SkipMissingFonts = false
};
- var tokenScanner = new CoreTokenScanner(inputBytes, true, useLenientParsing: options.UseLenientParsing);
+ var stackDepthGuard = new StackDepthGuard(options.MaxStackDepth);
+
+ var tokenScanner = new CoreTokenScanner(inputBytes, true, stackDepthGuard, useLenientParsing: options.UseLenientParsing);
var passwords = new List();
@@ -110,7 +112,7 @@ private static PdfDocument Open(IInputBytes inputBytes, ParsingOptions? options
options.Passwords = passwords;
- var document = OpenDocument(inputBytes, tokenScanner, options);
+ var document = OpenDocument(inputBytes, tokenScanner, options, stackDepthGuard);
return document;
}
@@ -118,7 +120,8 @@ private static PdfDocument Open(IInputBytes inputBytes, ParsingOptions? options
private static PdfDocument OpenDocument(
IInputBytes inputBytes,
ISeekableTokenScanner scanner,
- ParsingOptions parsingOptions)
+ ParsingOptions parsingOptions,
+ StackDepthGuard stackDepthGuard)
{
var filterProvider = new FilterProviderWithLookup(parsingOptions.FilterProvider ?? DefaultFilterProvider.Instance);
@@ -145,7 +148,7 @@ private static PdfDocument OpenDocument(
initialParse.BruteForceOffsets,
inputBytes);
- var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance, fileHeaderOffset, parsingOptions);
+ var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance, fileHeaderOffset, parsingOptions, stackDepthGuard);
var (rootReference, rootDictionary) = ParseTrailer(
trailer,
@@ -182,6 +185,7 @@ private static PdfDocument OpenDocument(
filterProvider,
encodingReader,
cmapCache,
+ stackDepthGuard,
parsingOptions.UseLenientParsing);
var trueTypeHandler = new TrueTypeFontHandler(
@@ -208,7 +212,7 @@ private static PdfDocument OpenDocument(
parsingOptions.UseLenientParsing);
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
- new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, parsingOptions.UseLenientParsing), parsingOptions);
+ new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, stackDepthGuard, parsingOptions.UseLenientParsing), parsingOptions);
var catalog = CatalogFactory.Create(
rootReference,
diff --git a/src/UglyToad.PdfPig/ParsingOptions.cs b/src/UglyToad.PdfPig/ParsingOptions.cs
index c4191c411..2d485ac69 100644
--- a/src/UglyToad.PdfPig/ParsingOptions.cs
+++ b/src/UglyToad.PdfPig/ParsingOptions.cs
@@ -52,6 +52,13 @@ public sealed class ParsingOptions
///
public bool SkipMissingFonts { get; set; } = false;
+ ///
+ /// Gets or sets the maximum allowed stack depth.
+ ///
+ /// This property can be used to limit the depth of recursive or nested operations to
+ /// prevent stack overflows or excessive resource usage.
+ public int MaxStackDepth { get; set; } = 256;
+
///
/// Filter provider to use while parsing the document. The will be used if set to null.
///
diff --git a/src/UglyToad.PdfPig/PdfFonts/Parser/CMapParser.cs b/src/UglyToad.PdfPig/PdfFonts/Parser/CMapParser.cs
index 82d7c8109..c5dc5da04 100644
--- a/src/UglyToad.PdfPig/PdfFonts/Parser/CMapParser.cs
+++ b/src/UglyToad.PdfPig/PdfFonts/Parser/CMapParser.cs
@@ -24,6 +24,7 @@ public CMap Parse(IInputBytes inputBytes)
{
var scanner = new CoreTokenScanner(inputBytes,
false,
+ StackDepthGuard.Infinite, // We don't check for stack overflows, we might want to change that.
namedDictionaryRequiredKeys: new Dictionary>
{
{ NameToken.CidSystemInfo, new[] { NameToken.Registry, NameToken.Ordering, NameToken.Supplement } }
diff --git a/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs b/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs
index 7b4d3df58..34cb0775d 100644
--- a/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs
+++ b/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs
@@ -21,12 +21,14 @@ internal class Type1FontHandler : IFontHandler
private readonly IEncodingReader encodingReader;
private readonly CMapLocalCache cmapLocalCache;
private readonly bool isLenientParsing;
+ private readonly StackDepthGuard stackDepthGuard;
public Type1FontHandler(
IPdfTokenScanner pdfScanner,
ILookupFilterProvider filterProvider,
IEncodingReader encodingReader,
CMapLocalCache cmapLocalCache,
+ StackDepthGuard stackDepthGuard,
bool isLenientParsing)
{
this.pdfScanner = pdfScanner;
@@ -34,6 +36,7 @@ public Type1FontHandler(
this.encodingReader = encodingReader;
this.cmapLocalCache = cmapLocalCache;
this.isLenientParsing = isLenientParsing;
+ this.stackDepthGuard = stackDepthGuard;
}
public IFont Generate(DictionaryToken dictionary)
@@ -172,7 +175,7 @@ public IFont Generate(DictionaryToken dictionary)
var length1 = stream.StreamDictionary.Get(NameToken.Length1, pdfScanner);
var length2 = stream.StreamDictionary.Get(NameToken.Length2, pdfScanner);
- var font = Type1FontParser.Parse(new MemoryInputBytes(bytes), length1.Int, length2.Int);
+ var font = Type1FontParser.Parse(new MemoryInputBytes(bytes), length1.Int, length2.Int, stackDepthGuard);
return Union.One(font);
}
diff --git a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs
index e6a9ce9a4..4794e6d49 100644
--- a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs
+++ b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs
@@ -51,13 +51,16 @@ internal class PdfTokenScanner : IPdfTokenScanner
public long Length => coreTokenScanner.Length;
+ private readonly StackDepthGuard stackDepthGuard;
+
public PdfTokenScanner(
IInputBytes inputBytes,
IObjectLocationProvider objectLocationProvider,
ILookupFilterProvider filterProvider,
IEncryptionHandler encryptionHandler,
FileHeaderOffset fileHeaderOffset,
- ParsingOptions parsingOptions)
+ ParsingOptions parsingOptions,
+ StackDepthGuard stackDepthGuard)
{
this.inputBytes = inputBytes;
this.objectLocationProvider = objectLocationProvider;
@@ -65,7 +68,8 @@ public PdfTokenScanner(
this.encryptionHandler = encryptionHandler;
this.fileHeaderOffset = fileHeaderOffset;
this.parsingOptions = parsingOptions;
- coreTokenScanner = new CoreTokenScanner(inputBytes, true, useLenientParsing: parsingOptions.UseLenientParsing);
+ this.stackDepthGuard = stackDepthGuard;
+ coreTokenScanner = new CoreTokenScanner(inputBytes, true, stackDepthGuard, useLenientParsing: parsingOptions.UseLenientParsing);
}
public void UpdateEncryptionHandler(IEncryptionHandler newHandler)
@@ -871,6 +875,7 @@ private IReadOnlyList ParseObjectStream(StreamToken stream, long of
var scanner = new CoreTokenScanner(
bytes,
true,
+ stackDepthGuard,
useLenientParsing: parsingOptions.UseLenientParsing,
isStream: true);
diff --git a/src/UglyToad.PdfPig/Writer/NoTextTokenWriter.cs b/src/UglyToad.PdfPig/Writer/NoTextTokenWriter.cs
index e159b4ab5..05edada13 100644
--- a/src/UglyToad.PdfPig/Writer/NoTextTokenWriter.cs
+++ b/src/UglyToad.PdfPig/Writer/NoTextTokenWriter.cs
@@ -74,7 +74,7 @@ private bool TryGetStreamWithoutText(StreamToken streamToken, [NotNullWhen(true)
return false;
}
- var pageContentParser = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance);
+ var pageContentParser = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, StackDepthGuard.Infinite);
IReadOnlyList operations;
try
{
diff --git a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs
index 7bd64aff8..9dc378343 100644
--- a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs
+++ b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs
@@ -347,7 +347,7 @@ public PdfPageBuilder AddPage(PdfDocument document, int pageNumber, AddPageOptio
}
var page = document.GetPage(pageNumber);
- var pcp = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, true);
+ var pcp = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, StackDepthGuard.Infinite, true);
// copy content streams
var streams = new List();