diff --git a/src/UglyToad.PdfPig.Core/StackDepthGuard.cs b/src/UglyToad.PdfPig.Core/StackDepthGuard.cs new file mode 100644 index 000000000..b116bdf0b --- /dev/null +++ b/src/UglyToad.PdfPig.Core/StackDepthGuard.cs @@ -0,0 +1,63 @@ +namespace UglyToad.PdfPig.Core +{ + /// + /// Provides a guard for tracking and limiting the depth of nested stack operations, such as recursive calls or + /// nested parsing. + /// + /// Use this class to prevent excessive stack usage by enforcing a maximum nesting depth. This is + /// particularly useful in scenarios where untrusted or deeply nested input could cause stack overflows or + /// performance issues. + public sealed class StackDepthGuard + { + /// + /// Represents a stack depth guard with no effective limit on the allowed depth. + /// + /// Use this instance when stack depth restrictions are not required. + public static readonly StackDepthGuard Infinite = new StackDepthGuard(int.MaxValue); + + private readonly int maxStackDepth; + + private int depth; + + /// + /// Initializes a new instance of the StackDepthGuard class with the specified maximum stack depth. + /// + /// The maximum allowed stack depth for guarded operations. Must be a positive integer. + public StackDepthGuard(int maxStackDepth) + { + if (maxStackDepth <= 0) + { + throw new ArgumentOutOfRangeException(nameof(maxStackDepth)); + } + this.maxStackDepth = maxStackDepth; + } + + /// + /// Increments the current nesting depth and checks against the maximum allowed stack depth. + /// + /// Thrown if the maximum allowed nesting depth is exceeded. + public void Enter() + { + if (++depth > maxStackDepth) + { + depth--; // Decrement so Exit remains balanced if someone catches this + throw new PdfDocumentFormatException($"Exceeded maximum nesting depth of {maxStackDepth}."); + } + } + + /// + /// Decreases the current depth level by one, ensuring that the depth does not become negative. + /// + /// If the current depth is already zero, calling this method has no effect. This method + /// is typically used to track or manage nested operations or scopes where depth must remain + /// non-negative. + public void Exit() + { + depth--; + if (depth < 0) + { + depth = 0; + } + } + } +} diff --git a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs index bd64e98d9..1b14a8c66 100644 --- a/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs +++ b/src/UglyToad.PdfPig.Fonts/Type1/Parser/Type1FontParser.cs @@ -21,15 +21,16 @@ public static class Type1FontParser private static readonly char[] Separators = [' ']; private static readonly Type1EncryptedPortionParser EncryptedPortionParser = new Type1EncryptedPortionParser(); - + /// /// Parses an embedded Adobe Type 1 font file. /// /// The bytes of the font program. /// The length in bytes of the clear text portion of the font program. /// The length in bytes of the encrypted portion of the font program. + /// /// The parsed type 1 font. - public static Type1Font Parse(IInputBytes inputBytes, int length1, int length2) + public static Type1Font Parse(IInputBytes inputBytes, int length1, int length2, StackDepthGuard stackDepthGuard) { // Sometimes the entire PFB file including the header bytes can be included which prevents parsing in the normal way. var isEntirePfbFile = inputBytes.Peek() == PfbFileIndicator; @@ -44,7 +45,7 @@ public static Type1Font Parse(IInputBytes inputBytes, int length1, int length2) inputBytes = new MemoryInputBytes(ascii); } - var scanner = new CoreTokenScanner(inputBytes, false); + var scanner = new CoreTokenScanner(inputBytes, false, stackDepthGuard); if (!scanner.TryReadToken(out CommentToken comment) || !comment.Data.StartsWith("!")) { diff --git a/src/UglyToad.PdfPig.Tests/Fonts/Cmap/CodespaceRangeTests.cs b/src/UglyToad.PdfPig.Tests/Fonts/Cmap/CodespaceRangeTests.cs index dbe6611e7..39193a053 100644 --- a/src/UglyToad.PdfPig.Tests/Fonts/Cmap/CodespaceRangeTests.cs +++ b/src/UglyToad.PdfPig.Tests/Fonts/Cmap/CodespaceRangeTests.cs @@ -105,7 +105,7 @@ public void ColorspaceParserError() { var parser = new CodespaceRangeParser(); var byteArrayInput = new MemoryInputBytes(OtherEncodings.StringAsLatin1Bytes("1 begincodespacerange\nendcodespacerange")); - var tokenScanner = new CoreTokenScanner(byteArrayInput, false); + var tokenScanner = new CoreTokenScanner(byteArrayInput, false, new StackDepthGuard(256)); Assert.True(tokenScanner.MoveNext()); Assert.True(tokenScanner.CurrentToken is NumericToken); diff --git a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs index d7be60f71..ba2ab9cd7 100644 --- a/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs +++ b/src/UglyToad.PdfPig.Tests/Fonts/Type1/Type1FontParserTests.cs @@ -11,7 +11,7 @@ public void CanReadHexEncryptedPortion() { var bytes = GetFileBytes("AdobeUtopia.pfa"); - Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0); + Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0, new StackDepthGuard(256)); } [Fact] @@ -20,7 +20,7 @@ public void CanReadBinaryEncryptedPortionOfFullPfb() // TODO: support reading in these pfb files var bytes = GetFileBytes("Raleway-Black.pfb"); - Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0); + Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0, new StackDepthGuard(256)); } [Fact] @@ -28,7 +28,7 @@ public void CanReadCharStrings() { var bytes = GetFileBytes("CMBX10.pfa"); - Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0); + Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0, new StackDepthGuard(256)); } [Fact] @@ -36,7 +36,7 @@ public void CanReadEncryptedPortion() { var bytes = GetFileBytes("CMCSC10"); - Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0); + Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0, new StackDepthGuard(256)); } [Fact] @@ -44,7 +44,7 @@ public void CanReadAsciiPart() { var bytes = GetFileBytes("CMBX12"); - Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0); + Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0, new StackDepthGuard(256)); } [Fact] @@ -52,7 +52,7 @@ public void OutputCmbx10Svgs() { var bytes = GetFileBytes("CMBX10"); - var result = Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0); + var result = Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0, new StackDepthGuard(256)); var builder = new StringBuilder(""); foreach (var charString in result.CharStrings.CharStrings) @@ -71,7 +71,7 @@ public void CanReadFontWithCommentsInOtherSubrs() { var bytes = GetFileBytes("CMR10"); - Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0); + Type1FontParser.Parse(new MemoryInputBytes(bytes), 0, 0, new StackDepthGuard(256)); } private static byte[] GetFileBytes(string name) diff --git a/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs b/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs index 25ae26a63..8d2a48bea 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/GithubIssuesTests.cs @@ -11,6 +11,20 @@ public class GithubIssuesTests { + [Fact] + public void Issue1217() + { + var path = IntegrationHelpers.GetSpecificTestDocumentPath("stackoverflow_error.pdf"); + + var options = new ParsingOptions() + { + UseLenientParsing = true, + MaxStackDepth = 100 + }; + var ex = Assert.Throws(() => PdfDocument.Open(path, options)); + Assert.Equal($"Exceeded maximum nesting depth of {options.MaxStackDepth}.", ex.Message); + } + [Fact] public void Issue1223() { diff --git a/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/stackoverflow_error.pdf b/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/stackoverflow_error.pdf new file mode 100644 index 000000000..f20a87ab3 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/SpecificTestDocuments/stackoverflow_error.pdf differ diff --git a/src/UglyToad.PdfPig.Tests/Parser/FileStructure/FirstPassParserTests.cs b/src/UglyToad.PdfPig.Tests/Parser/FileStructure/FirstPassParserTests.cs index f4ab1c5e8..d47cec01d 100644 --- a/src/UglyToad.PdfPig.Tests/Parser/FileStructure/FirstPassParserTests.cs +++ b/src/UglyToad.PdfPig.Tests/Parser/FileStructure/FirstPassParserTests.cs @@ -42,7 +42,7 @@ 0000000576 00000 n var results = FirstPassParser.Parse( new FileHeaderOffset(0), ib.Bytes, - new CoreTokenScanner(ib.Bytes, true)); + new CoreTokenScanner(ib.Bytes, true, new StackDepthGuard(256))); Assert.Equal(2, results.Parts.Count); Assert.NotNull(results.Trailer); @@ -114,7 +114,7 @@ 0000004385 00000 n var ib = StringBytesTestConverter.Convert(content, false); - var results = FirstPassParser.Parse(new FileHeaderOffset(0), ib.Bytes, new CoreTokenScanner(ib.Bytes, true)); + var results = FirstPassParser.Parse(new FileHeaderOffset(0), ib.Bytes, new CoreTokenScanner(ib.Bytes, true, new StackDepthGuard(256))); var offsets = results.Parts.Select(x => x.Offset).OrderBy(x => x).ToList(); @@ -123,7 +123,7 @@ 0000004385 00000 n Assert.NotNull(results.Trailer); ib.Bytes.Seek(98); - var scanner = new CoreTokenScanner(ib.Bytes, false); + var scanner = new CoreTokenScanner(ib.Bytes, false, new StackDepthGuard(256)); scanner.MoveNext(); Assert.Equal(scanner.CurrentToken, OperatorToken.Xref); } diff --git a/src/UglyToad.PdfPig.Tests/Parser/PageContentParserTests.cs b/src/UglyToad.PdfPig.Tests/Parser/PageContentParserTests.cs index 4ced371b2..b88d169e4 100644 --- a/src/UglyToad.PdfPig.Tests/Parser/PageContentParserTests.cs +++ b/src/UglyToad.PdfPig.Tests/Parser/PageContentParserTests.cs @@ -15,7 +15,7 @@ public class PageContentParserTests { - private readonly PageContentParser parser = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance); + private readonly PageContentParser parser = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, new StackDepthGuard(256)); private readonly ILog log = new NoOpLog(); [Fact] @@ -210,7 +210,7 @@ public void CorrectlyHandlesFile0007511CorruptInlineImage() var content = File.ReadAllText(path); var input = StringBytesTestConverter.Convert(content, false); - var lenientParser = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, true); + var lenientParser = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, new StackDepthGuard(256), true); var result = lenientParser.Parse(1, input.Bytes, log); Assert.NotEmpty(result); diff --git a/src/UglyToad.PdfPig.Tests/Parser/Parts/FileHeaderParserTests.cs b/src/UglyToad.PdfPig.Tests/Parser/Parts/FileHeaderParserTests.cs index a0de77930..2d2ed5f79 100644 --- a/src/UglyToad.PdfPig.Tests/Parser/Parts/FileHeaderParserTests.cs +++ b/src/UglyToad.PdfPig.Tests/Parser/Parts/FileHeaderParserTests.cs @@ -149,7 +149,7 @@ public void Issue334() var bytes = new MemoryInputBytes(input); - var scanner = new CoreTokenScanner(bytes, true, ScannerScope.None); + var scanner = new CoreTokenScanner(bytes, true, new StackDepthGuard(256), ScannerScope.None); var result = FileHeaderParser.Parse(scanner, bytes, false, log); diff --git a/src/UglyToad.PdfPig.Tests/Parser/Parts/FileTrailerParserTests.cs b/src/UglyToad.PdfPig.Tests/Parser/Parts/FileTrailerParserTests.cs index b0e8ee4ae..505105381 100644 --- a/src/UglyToad.PdfPig.Tests/Parser/Parts/FileTrailerParserTests.cs +++ b/src/UglyToad.PdfPig.Tests/Parser/Parts/FileTrailerParserTests.cs @@ -1,5 +1,6 @@ namespace UglyToad.PdfPig.Tests.Parser.Parts; +using PdfPig.Core; using PdfPig.Parser.FileStructure; using PdfPig.Tokenization.Scanner; @@ -26,7 +27,7 @@ 12 0 obj var result = FirstPassParser.GetFirstCrossReferenceOffset( input.Bytes, - new CoreTokenScanner(input.Bytes, true), + new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)), new TestingLog()); Assert.Equal(456, result.StartXRefDeclaredOffset); @@ -59,7 +60,7 @@ 12 0 obj var result = FirstPassParser.GetFirstCrossReferenceOffset( input.Bytes, - new CoreTokenScanner(input.Bytes, true), + new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)), new TestingLog()); Assert.Equal(17, result.StartXRefDeclaredOffset); @@ -93,7 +94,7 @@ 12 0 obj var result = FirstPassParser.GetFirstCrossReferenceOffset( input.Bytes, - new CoreTokenScanner(input.Bytes, true), + new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)), new TestingLog()); Assert.Equal(1384733, result.StartXRefDeclaredOffset); @@ -106,7 +107,7 @@ public void BadInputBytesReturnsNull() var result = FirstPassParser.GetFirstCrossReferenceOffset( input.Bytes, - new CoreTokenScanner(input.Bytes, true), + new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)), new TestingLog()); Assert.Null(result.StartXRefDeclaredOffset); @@ -130,7 +131,7 @@ 11 0 obj var result = FirstPassParser.GetFirstCrossReferenceOffset( input.Bytes, - new CoreTokenScanner(input.Bytes, true), + new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)), new TestingLog()); Assert.Null(result.StartXRefDeclaredOffset); @@ -151,7 +152,7 @@ 1 0 obj var result = FirstPassParser.GetFirstCrossReferenceOffset( input.Bytes, - new CoreTokenScanner(input.Bytes, true), + new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)), new TestingLog()); Assert.Null(result.StartXRefDeclaredOffset); @@ -185,7 +186,7 @@ 12 0 obj var result = FirstPassParser.GetFirstCrossReferenceOffset( input.Bytes, - new CoreTokenScanner(input.Bytes, true), + new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)), new TestingLog()); Assert.Equal(1274665676543, result.StartXRefDeclaredOffset); @@ -207,7 +208,7 @@ public void CanReadStartXrefIfCommentsPresent() var result = FirstPassParser.GetFirstCrossReferenceOffset( input.Bytes, - new CoreTokenScanner(input.Bytes, true), + new CoreTokenScanner(input.Bytes, true, new StackDepthGuard(256)), new TestingLog()); Assert.Equal(57695, result.StartXRefDeclaredOffset); diff --git a/src/UglyToad.PdfPig.Tests/StringBytesTestConverter.cs b/src/UglyToad.PdfPig.Tests/StringBytesTestConverter.cs index d91647d49..537f133ce 100644 --- a/src/UglyToad.PdfPig.Tests/StringBytesTestConverter.cs +++ b/src/UglyToad.PdfPig.Tests/StringBytesTestConverter.cs @@ -34,7 +34,7 @@ public class Result internal static (CoreTokenScanner scanner, IInputBytes bytes) Scanner(string s) { var inputBytes = new MemoryInputBytes(OtherEncodings.StringAsLatin1Bytes(s)); - var result = new CoreTokenScanner(inputBytes, true); + var result = new CoreTokenScanner(inputBytes, true, new StackDepthGuard(256)); return (result, inputBytes); } diff --git a/src/UglyToad.PdfPig.Tests/Tokenization/ArrayTokenizerTests.cs b/src/UglyToad.PdfPig.Tests/Tokenization/ArrayTokenizerTests.cs index 7c0d2d81e..93478cff4 100644 --- a/src/UglyToad.PdfPig.Tests/Tokenization/ArrayTokenizerTests.cs +++ b/src/UglyToad.PdfPig.Tests/Tokenization/ArrayTokenizerTests.cs @@ -1,11 +1,12 @@ namespace UglyToad.PdfPig.Tests.Tokenization { + using PdfPig.Core; using PdfPig.Tokenization; using PdfPig.Tokens; public class ArrayTokenizerTests { - private readonly ArrayTokenizer tokenizer = new ArrayTokenizer(true); + private readonly ArrayTokenizer tokenizer = new ArrayTokenizer(true, new StackDepthGuard(256)); [Theory] [InlineData("]")] diff --git a/src/UglyToad.PdfPig.Tests/Tokenization/DictionaryTokenizerTests.cs b/src/UglyToad.PdfPig.Tests/Tokenization/DictionaryTokenizerTests.cs index 97031fe41..882f84d21 100644 --- a/src/UglyToad.PdfPig.Tests/Tokenization/DictionaryTokenizerTests.cs +++ b/src/UglyToad.PdfPig.Tests/Tokenization/DictionaryTokenizerTests.cs @@ -7,7 +7,7 @@ namespace UglyToad.PdfPig.Tests.Tokenization public class DictionaryTokenizerTests { - private readonly DictionaryTokenizer tokenizer = new DictionaryTokenizer(true); + private readonly DictionaryTokenizer tokenizer = new DictionaryTokenizer(true, new StackDepthGuard(256)); [Theory] [InlineData("[rjee]")] diff --git a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/CoreTokenScannerTests.cs b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/CoreTokenScannerTests.cs index e4eb551a6..e36189097 100644 --- a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/CoreTokenScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/CoreTokenScannerTests.cs @@ -11,7 +11,7 @@ public class CoreTokenScannerTests public CoreTokenScannerTests() { - scannerFactory = x => new CoreTokenScanner(x, true); + scannerFactory = x => new CoreTokenScanner(x, true, new StackDepthGuard(256)); } [Fact] @@ -231,7 +231,7 @@ public void SkipsCommentsInStreams() var scanner = new CoreTokenScanner( StringBytesTestConverter.Convert(content, false).Bytes, - true, + true, new StackDepthGuard(256), isStream: true); while (scanner.MoveNext()) @@ -247,7 +247,7 @@ public void SkipsCommentsInStreams() var nonStreamScanner = new CoreTokenScanner( StringBytesTestConverter.Convert(content, false).Bytes, - true, + true, new StackDepthGuard(256), isStream: false); while (nonStreamScanner.MoveNext()) @@ -293,7 +293,7 @@ 0 0 m var scanner = new CoreTokenScanner( StringBytesTestConverter.Convert(content, false).Bytes, - true, + true, new StackDepthGuard(256), isStream: true); while (scanner.MoveNext()) diff --git a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs index 2e83a3702..3fa902b5b 100644 --- a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs @@ -726,7 +726,8 @@ private static PdfTokenScanner GetScanner(string s, TestObjectLocationProvider l new TestFilterProvider(), NoOpEncryptionHandler.Instance, new FileHeaderOffset(0), - useLenientParsing ? new ParsingOptions() : ParsingOptions.LenientParsingOff); + useLenientParsing ? new ParsingOptions() : ParsingOptions.LenientParsingOff, + new StackDepthGuard(256)); } private static IReadOnlyList ReadToEnd(PdfTokenScanner scanner) diff --git a/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs index 8c95e41ff..c054d0e24 100644 --- a/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs +++ b/src/UglyToad.PdfPig.Tokenization/ArrayTokenizer.cs @@ -8,12 +8,14 @@ internal sealed class ArrayTokenizer : ITokenizer { private readonly bool usePdfDocEncoding; + private readonly StackDepthGuard stackDepthGuard; public bool ReadsNextByte { get; } = false; - public ArrayTokenizer(bool usePdfDocEncoding) + public ArrayTokenizer(bool usePdfDocEncoding, StackDepthGuard stackDepthGuard) { this.usePdfDocEncoding = usePdfDocEncoding; + this.stackDepthGuard = stackDepthGuard; } public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) @@ -25,7 +27,7 @@ public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken tok return false; } - var scanner = new CoreTokenScanner(inputBytes, usePdfDocEncoding, ScannerScope.Array); + var scanner = new CoreTokenScanner(inputBytes, usePdfDocEncoding, stackDepthGuard, ScannerScope.Array); var contents = new List(); diff --git a/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs b/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs index 213fdcfc2..655edfc35 100644 --- a/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs +++ b/src/UglyToad.PdfPig.Tokenization/DictionaryTokenizer.cs @@ -10,6 +10,7 @@ internal class DictionaryTokenizer : ITokenizer private readonly bool usePdfDocEncoding; private readonly IReadOnlyList requiredKeys; private readonly bool useLenientParsing; + private readonly StackDepthGuard stackDepthGuard; public bool ReadsNextByte { get; } = false; @@ -19,14 +20,16 @@ internal class DictionaryTokenizer : ITokenizer /// /// Whether to read strings using the PdfDocEncoding. /// + /// /// /// Can be provided to recover from errors with missing dictionary end symbols if the /// set of keys expected in the dictionary are known. /// /// Whether to use lenient parsing. - public DictionaryTokenizer(bool usePdfDocEncoding, IReadOnlyList requiredKeys = null, bool useLenientParsing = false) + public DictionaryTokenizer(bool usePdfDocEncoding, StackDepthGuard stackDepthGuard, IReadOnlyList requiredKeys = null, bool useLenientParsing = false) { this.usePdfDocEncoding = usePdfDocEncoding; + this.stackDepthGuard = stackDepthGuard; this.requiredKeys = requiredKeys; this.useLenientParsing = useLenientParsing; } @@ -83,7 +86,7 @@ private bool TryTokenizeInternal(byte currentByte, IInputBytes inputBytes, bool return false; } - var coreScanner = new CoreTokenScanner(inputBytes, usePdfDocEncoding, ScannerScope.Dictionary, useLenientParsing: useLenientParsing); + var coreScanner = new CoreTokenScanner(inputBytes, usePdfDocEncoding, stackDepthGuard, ScannerScope.Dictionary, useLenientParsing: useLenientParsing); var tokens = new List(); diff --git a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs index 9d74ba031..f1f4e9eac 100644 --- a/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs +++ b/src/UglyToad.PdfPig.Tokenization/Scanner/CoreTokenScanner.cs @@ -52,12 +52,15 @@ public class CoreTokenScanner : ISeekableTokenScanner /// private readonly bool isStream; + private readonly StackDepthGuard stackDepthGuard; + /// /// Create a new from the input. /// public CoreTokenScanner( IInputBytes inputBytes, bool usePdfDocEncoding, + StackDepthGuard stackDepthGuard, ScannerScope scope = ScannerScope.None, IReadOnlyDictionary> namedDictionaryRequiredKeys = null, bool useLenientParsing = false, @@ -65,9 +68,10 @@ public CoreTokenScanner( { this.inputBytes = inputBytes ?? throw new ArgumentNullException(nameof(inputBytes)); this.usePdfDocEncoding = usePdfDocEncoding; + this.stackDepthGuard = stackDepthGuard; this.stringTokenizer = new StringTokenizer(usePdfDocEncoding); - this.arrayTokenizer = new ArrayTokenizer(usePdfDocEncoding); - this.dictionaryTokenizer = new DictionaryTokenizer(usePdfDocEncoding, useLenientParsing: useLenientParsing); + this.arrayTokenizer = new ArrayTokenizer(usePdfDocEncoding, this.stackDepthGuard); + this.dictionaryTokenizer = new DictionaryTokenizer(usePdfDocEncoding, this.stackDepthGuard, useLenientParsing: useLenientParsing); this.scope = scope; this.namedDictionaryRequiredKeys = namedDictionaryRequiredKeys; this.useLenientParsing = useLenientParsing; @@ -101,6 +105,19 @@ public void Seek(long position) /// public bool MoveNext() + { + stackDepthGuard.Enter(); + try + { + return MoveNextInternal(); + } + finally + { + stackDepthGuard.Exit(); + } + } + + private bool MoveNextInternal() { var endAngleBracesRead = 0; @@ -169,7 +186,7 @@ public bool MoveNext() && CurrentToken is NameToken name && namedDictionaryRequiredKeys.TryGetValue(name, out var requiredKeys)) { - tokenizer = new DictionaryTokenizer(usePdfDocEncoding, requiredKeys, useLenientParsing); + tokenizer = new DictionaryTokenizer(usePdfDocEncoding, stackDepthGuard, requiredKeys, useLenientParsing); } } else diff --git a/src/UglyToad.PdfPig/Parser/PageContentParser.cs b/src/UglyToad.PdfPig/Parser/PageContentParser.cs index 3d317c297..5b3d058cc 100644 --- a/src/UglyToad.PdfPig/Parser/PageContentParser.cs +++ b/src/UglyToad.PdfPig/Parser/PageContentParser.cs @@ -21,6 +21,7 @@ public sealed class PageContentParser : IPageContentParser { private readonly IGraphicsStateOperationFactory operationFactory; private readonly bool useLenientParsing; + private readonly StackDepthGuard stackDepthGuard; /// /// Initialises a new instance of the class. @@ -28,12 +29,14 @@ public sealed class PageContentParser : IPageContentParser /// /// The factory responsible for creating graphics state operations. /// + /// /// /// A value indicating whether lenient parsing should be used. Defaults to false. /// - public PageContentParser(IGraphicsStateOperationFactory operationFactory, bool useLenientParsing = false) + public PageContentParser(IGraphicsStateOperationFactory operationFactory, StackDepthGuard stackDepthGuard, bool useLenientParsing = false) { this.operationFactory = operationFactory; + this.stackDepthGuard = stackDepthGuard; this.useLenientParsing = useLenientParsing; } @@ -55,7 +58,7 @@ public IReadOnlyList Parse( IInputBytes inputBytes, ILog log) { - var scanner = new CoreTokenScanner(inputBytes, false, useLenientParsing: useLenientParsing); + var scanner = new CoreTokenScanner(inputBytes, false, stackDepthGuard, useLenientParsing: useLenientParsing); var precedingTokens = new List(); var graphicsStateOperations = new List(); diff --git a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs index 491f79653..9289e2bfb 100644 --- a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs +++ b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs @@ -89,7 +89,9 @@ private static PdfDocument Open(IInputBytes inputBytes, ParsingOptions? options SkipMissingFonts = false }; - var tokenScanner = new CoreTokenScanner(inputBytes, true, useLenientParsing: options.UseLenientParsing); + var stackDepthGuard = new StackDepthGuard(options.MaxStackDepth); + + var tokenScanner = new CoreTokenScanner(inputBytes, true, stackDepthGuard, useLenientParsing: options.UseLenientParsing); var passwords = new List(); @@ -110,7 +112,7 @@ private static PdfDocument Open(IInputBytes inputBytes, ParsingOptions? options options.Passwords = passwords; - var document = OpenDocument(inputBytes, tokenScanner, options); + var document = OpenDocument(inputBytes, tokenScanner, options, stackDepthGuard); return document; } @@ -118,7 +120,8 @@ private static PdfDocument Open(IInputBytes inputBytes, ParsingOptions? options private static PdfDocument OpenDocument( IInputBytes inputBytes, ISeekableTokenScanner scanner, - ParsingOptions parsingOptions) + ParsingOptions parsingOptions, + StackDepthGuard stackDepthGuard) { var filterProvider = new FilterProviderWithLookup(parsingOptions.FilterProvider ?? DefaultFilterProvider.Instance); @@ -145,7 +148,7 @@ private static PdfDocument OpenDocument( initialParse.BruteForceOffsets, inputBytes); - var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance, fileHeaderOffset, parsingOptions); + var pdfScanner = new PdfTokenScanner(inputBytes, locationProvider, filterProvider, NoOpEncryptionHandler.Instance, fileHeaderOffset, parsingOptions, stackDepthGuard); var (rootReference, rootDictionary) = ParseTrailer( trailer, @@ -182,6 +185,7 @@ private static PdfDocument OpenDocument( filterProvider, encodingReader, cmapCache, + stackDepthGuard, parsingOptions.UseLenientParsing); var trueTypeHandler = new TrueTypeFontHandler( @@ -208,7 +212,7 @@ private static PdfDocument OpenDocument( parsingOptions.UseLenientParsing); var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider, - new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, parsingOptions.UseLenientParsing), parsingOptions); + new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, stackDepthGuard, parsingOptions.UseLenientParsing), parsingOptions); var catalog = CatalogFactory.Create( rootReference, diff --git a/src/UglyToad.PdfPig/ParsingOptions.cs b/src/UglyToad.PdfPig/ParsingOptions.cs index c4191c411..2d485ac69 100644 --- a/src/UglyToad.PdfPig/ParsingOptions.cs +++ b/src/UglyToad.PdfPig/ParsingOptions.cs @@ -52,6 +52,13 @@ public sealed class ParsingOptions /// public bool SkipMissingFonts { get; set; } = false; + /// + /// Gets or sets the maximum allowed stack depth. + /// + /// This property can be used to limit the depth of recursive or nested operations to + /// prevent stack overflows or excessive resource usage. + public int MaxStackDepth { get; set; } = 256; + /// /// Filter provider to use while parsing the document. The will be used if set to null. /// diff --git a/src/UglyToad.PdfPig/PdfFonts/Parser/CMapParser.cs b/src/UglyToad.PdfPig/PdfFonts/Parser/CMapParser.cs index 82d7c8109..c5dc5da04 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Parser/CMapParser.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Parser/CMapParser.cs @@ -24,6 +24,7 @@ public CMap Parse(IInputBytes inputBytes) { var scanner = new CoreTokenScanner(inputBytes, false, + StackDepthGuard.Infinite, // We don't check for stack overflows, we might want to change that. namedDictionaryRequiredKeys: new Dictionary> { { NameToken.CidSystemInfo, new[] { NameToken.Registry, NameToken.Ordering, NameToken.Supplement } } diff --git a/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs b/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs index 7b4d3df58..34cb0775d 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs @@ -21,12 +21,14 @@ internal class Type1FontHandler : IFontHandler private readonly IEncodingReader encodingReader; private readonly CMapLocalCache cmapLocalCache; private readonly bool isLenientParsing; + private readonly StackDepthGuard stackDepthGuard; public Type1FontHandler( IPdfTokenScanner pdfScanner, ILookupFilterProvider filterProvider, IEncodingReader encodingReader, CMapLocalCache cmapLocalCache, + StackDepthGuard stackDepthGuard, bool isLenientParsing) { this.pdfScanner = pdfScanner; @@ -34,6 +36,7 @@ public Type1FontHandler( this.encodingReader = encodingReader; this.cmapLocalCache = cmapLocalCache; this.isLenientParsing = isLenientParsing; + this.stackDepthGuard = stackDepthGuard; } public IFont Generate(DictionaryToken dictionary) @@ -172,7 +175,7 @@ public IFont Generate(DictionaryToken dictionary) var length1 = stream.StreamDictionary.Get(NameToken.Length1, pdfScanner); var length2 = stream.StreamDictionary.Get(NameToken.Length2, pdfScanner); - var font = Type1FontParser.Parse(new MemoryInputBytes(bytes), length1.Int, length2.Int); + var font = Type1FontParser.Parse(new MemoryInputBytes(bytes), length1.Int, length2.Int, stackDepthGuard); return Union.One(font); } diff --git a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs index e6a9ce9a4..4794e6d49 100644 --- a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs +++ b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs @@ -51,13 +51,16 @@ internal class PdfTokenScanner : IPdfTokenScanner public long Length => coreTokenScanner.Length; + private readonly StackDepthGuard stackDepthGuard; + public PdfTokenScanner( IInputBytes inputBytes, IObjectLocationProvider objectLocationProvider, ILookupFilterProvider filterProvider, IEncryptionHandler encryptionHandler, FileHeaderOffset fileHeaderOffset, - ParsingOptions parsingOptions) + ParsingOptions parsingOptions, + StackDepthGuard stackDepthGuard) { this.inputBytes = inputBytes; this.objectLocationProvider = objectLocationProvider; @@ -65,7 +68,8 @@ public PdfTokenScanner( this.encryptionHandler = encryptionHandler; this.fileHeaderOffset = fileHeaderOffset; this.parsingOptions = parsingOptions; - coreTokenScanner = new CoreTokenScanner(inputBytes, true, useLenientParsing: parsingOptions.UseLenientParsing); + this.stackDepthGuard = stackDepthGuard; + coreTokenScanner = new CoreTokenScanner(inputBytes, true, stackDepthGuard, useLenientParsing: parsingOptions.UseLenientParsing); } public void UpdateEncryptionHandler(IEncryptionHandler newHandler) @@ -871,6 +875,7 @@ private IReadOnlyList ParseObjectStream(StreamToken stream, long of var scanner = new CoreTokenScanner( bytes, true, + stackDepthGuard, useLenientParsing: parsingOptions.UseLenientParsing, isStream: true); diff --git a/src/UglyToad.PdfPig/Writer/NoTextTokenWriter.cs b/src/UglyToad.PdfPig/Writer/NoTextTokenWriter.cs index e159b4ab5..05edada13 100644 --- a/src/UglyToad.PdfPig/Writer/NoTextTokenWriter.cs +++ b/src/UglyToad.PdfPig/Writer/NoTextTokenWriter.cs @@ -74,7 +74,7 @@ private bool TryGetStreamWithoutText(StreamToken streamToken, [NotNullWhen(true) return false; } - var pageContentParser = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance); + var pageContentParser = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, StackDepthGuard.Infinite); IReadOnlyList operations; try { diff --git a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs index 7bd64aff8..9dc378343 100644 --- a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs +++ b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs @@ -347,7 +347,7 @@ public PdfPageBuilder AddPage(PdfDocument document, int pageNumber, AddPageOptio } var page = document.GetPage(pageNumber); - var pcp = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, true); + var pcp = new PageContentParser(ReflectionGraphicsStateOperationFactory.Instance, StackDepthGuard.Infinite, true); // copy content streams var streams = new List();