diff --git a/examples/AdvancedMerge.cs b/examples/AdvancedMerge.cs new file mode 100644 index 000000000..95d57cbcc --- /dev/null +++ b/examples/AdvancedMerge.cs @@ -0,0 +1,203 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using UglyToad.PdfPig; +using UglyToad.PdfPig.Core; +using UglyToad.PdfPig.Tokens; +using UglyToad.PdfPig.Writer; + +namespace UglyToad.Examples; + +public class AdvancedMerge +{ + public static void Run(Stream input1, Stream output) + { + using var pdf = PdfDocument.Open(input1); + + if (!pdf.Structure.Catalog.CatalogDictionary.TryGet(NameToken.Pages, out var pages)) + throw new ArgumentException("No pages reference were found"); + + if (ResolveIndirect(pdf, pages) is not DictionaryToken pagesObj) + throw new ArgumentException("No pages object were found"); + + // Assume, we have only 1 page in here + if (!pagesObj.TryGet(NameToken.Kids, out ArrayToken kids) || kids.Length != 1) + throw new ArgumentException("Invalid catalog dictionary"); + + var kidReference = kids.Data[0] as IndirectReferenceToken; + if (ResolveIndirect(pdf, kidReference) is not DictionaryToken pageObj) + throw new ArgumentException("Invalid catalog dictionary"); + + // Skip all pdf meta structure objects + var skippedRefs = new HashSet + { + pages.Data, // Pages + kidReference!.Data, // Page + pdf.Structure.Trailer.Root, // Catalog + }; + + // Skip all refs from "skippedRefs" and order it by object number + var oldRefs = pdf.Structure.CrossReferenceTable.ObjectOffsets.Keys + .Where(k => !skippedRefs.Contains(k)) + .OrderBy(k => k.ObjectNumber) + .ToList(); + + using var outputPdf = PdfDocument.Open(output); + + // Building refs map, to rebind old objects to their new values + var refMap = new Dictionary(); + var currentObjectNumber = outputPdf.Structure.Trailer.Size; + foreach (var oldRef in oldRefs) + { + var newRef = new IndirectReference(currentObjectNumber++, 0); + refMap[oldRef] = newRef; + } + + output.Seek(0, SeekOrigin.End); + output.WriteByte((byte)'\n'); // without endline pdf wouldn't render in some readers + + var newPdfObjects = new Dictionary(); + + foreach (var oldRef in oldRefs) + { + var newObjRef = refMap[oldRef]; + var newXref = XrefLocation.File(output.Position); + var token = ResolveIndirect(pdf, oldRef); + var updatedToken = ReplaceReferences(token, refMap); + + newPdfObjects[newObjRef] = newXref; + output.Seek(0, SeekOrigin.End); + TokenWriter.Instance.WriteToken(new ObjectToken(newXref, newObjRef, updatedToken), output); + } + + // Bind input content to the last output page + var lastPageRef = FindLastPage(outputPdf); + + if (ResolveIndirect(outputPdf, lastPageRef) is not DictionaryToken outputPage) + throw new ArgumentException("Invalid catalog dictionary"); + + if (!pageObj.TryGet(NameToken.Contents, out var contentObj)) + throw new ArgumentException("Invalid catalog dictionary"); + + // Assume we have resources needed for content to render + if (!pageObj.TryGet(NameToken.Resources, out var resources)) + throw new ArgumentException("Invalid page object"); + + output.Seek(0, SeekOrigin.End); + var xrefLocation = XrefLocation.File(output.Position); + var newPageObject = outputPage + .With(NameToken.Contents, new IndirectReferenceToken(refMap[contentObj.Data])) + .With(NameToken.Resources, new IndirectReferenceToken(refMap[resources.Data])); + TokenWriter.Instance.WriteToken(new ObjectToken(xrefLocation, lastPageRef.Data, newPageObject), output); + + newPdfObjects[lastPageRef.Data] = xrefLocation; + + // Writer new xref table + TokenWriter.Instance.WriteCrossReferenceTable( + newPdfObjects.ToDictionary(kvp => kvp.Key, kvp => kvp.Value.Value1), + outputPdf.Structure.Trailer.Root, + output, + null, + outputPdf.Structure.XrefOffset); + } + + /// + /// Recursively replaces IndirectReferenceToken in the token tree according to the map. + /// + private static IToken ReplaceReferences(IToken token, Dictionary mapping) + { + return token switch + { + IndirectReferenceToken irt => mapping.TryGetValue(irt.Data, out var newRef) ? new IndirectReferenceToken(newRef) : token, + DictionaryToken dict => ReplaceDictionary(dict, mapping), + ArrayToken arr => ReplaceArray(arr, mapping), + StreamToken stream => ReplaceStream(stream, mapping), + _ => token + }; + } + + private static DictionaryToken ReplaceDictionary(DictionaryToken original, Dictionary mapping) + { + var newDict = new Dictionary(original.Data.Count); + foreach (var kvp in original.Data) + { + newDict[NameToken.Create(kvp.Key)] = ReplaceReferences(kvp.Value, mapping); + } + return new DictionaryToken(newDict); + } + + private static ArrayToken ReplaceArray(ArrayToken original, Dictionary mapping) + { + var newData = new IToken[original.Length]; + for (int i = 0; i < original.Length; i++) + { + newData[i] = ReplaceReferences(original.Data[i], mapping); + } + return new ArrayToken(newData); + } + + private static StreamToken ReplaceStream(StreamToken original, Dictionary mapping) + { + var updatedDict = ReplaceDictionary(original.StreamDictionary, mapping); + // We create a new StreamToken with the replaced dictionary, preserving the original byte stream. + return new StreamToken(updatedDict, original.Data); + } + + private const int MaxIndirectResolutionDepth = 32; + + private static IndirectReferenceToken FindLastPage(PdfDocument pdf) + { + if (!pdf.Structure.Catalog.CatalogDictionary.TryGet(NameToken.Pages, out var pagesRef)) + throw new ArgumentException("No pages were found in the input document."); + + if (ResolveIndirect(pdf, pagesRef) is not DictionaryToken pages) + throw new ArgumentException("No pages were found in the input file."); + + if (!pages.TryGet(NameToken.Kids, out var kids)) + throw new ArgumentException("No pages were found in the input document."); + + return FindLastPage(pdf, kids); + } + + private static IndirectReferenceToken FindLastPage(PdfDocument pdf, ArrayToken pageTree) + { + while (true) + { + if (pageTree.Length == 0) + throw new ArgumentException("No leaf in page tree"); + + var root = pageTree.Data.Last()!; + if (ResolveIndirect(pdf, root) is not DictionaryToken newRoot) + throw new ArgumentException("Indirect page tree"); + + if (newRoot.Data[NameToken.Type] is not NameToken type) + throw new ArgumentException("Indirect page tree"); + + if (type.Data == NameToken.Page) + return (root as IndirectReferenceToken)!; + pageTree = (newRoot.Data[NameToken.Kids] as ArrayToken)!; + } + } + + private static IToken ResolveIndirect(PdfDocument doc, IndirectReference reference) + { + return ResolveIndirect(doc, new IndirectReferenceToken(reference)); + } + + private static IToken ResolveIndirect(PdfDocument doc, IToken token) + { + var depth = 0; + while (token is IndirectReferenceToken ir) + { + if (++depth > MaxIndirectResolutionDepth) + throw new ArgumentException( + "Cyclic or excessively deep indirect reference in PDF signature dictionary."); + + var obj = doc.Structure.GetObject(ir.Data); + token = obj.Data ?? throw new ArgumentException("Failed to parse PDF digital signature."); + } + + return token; + } +} \ No newline at end of file diff --git a/examples/Program.cs b/examples/Program.cs index 108128d6f..e2fceede1 100644 --- a/examples/Program.cs +++ b/examples/Program.cs @@ -47,12 +47,24 @@ public static void Main() ("Advance text extraction using layout analysis algorithms", () => AdvancedTextExtraction.Run(Path.Combine(filesDirectory, "ICML03-081.pdf"))) }, - { - 8, + {8, ("Extract Words with newline detection (example with algorithm). Issue 512", () => OpenDocumentAndExtractWords.Run(Path.Combine(filesDirectory, "OPEN.RABBIT.ENGLISH.LOP.pdf"))) - } - }; + } , + {9, + ("Advanced pdf merge, using low level pdf tools, like trailer dictionary and xref table", + () => + { + using var output = new FileStream("AdvancedMergeResult.pdf", FileMode.Create); + using var input2 = File.Open(Path.Combine(filesDirectory, "EmptyPdf.pdf"), FileMode.Open); + using var input = File.Open(Path.Combine(filesDirectory, "Various Content Types.pdf"), FileMode.Open); + + input2.CopyTo(output); + output.Seek(0, SeekOrigin.Begin); + AdvancedMerge.Run(input, output); + }) + } + }; var choices = string.Join(Environment.NewLine, examples.Select(x => $"{x.Key}: {x.Value.name}")); diff --git a/src/UglyToad.PdfPig.Tests/Integration/AdvancedMergeTests.cs b/src/UglyToad.PdfPig.Tests/Integration/AdvancedMergeTests.cs new file mode 100644 index 000000000..dfaf9b035 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Integration/AdvancedMergeTests.cs @@ -0,0 +1,248 @@ +namespace UglyToad.PdfPig.Tests.Integration; + +using PdfPig.Core; +using PdfPig.Tokens; +using PdfPig.Writer; + +public class AdvanceMergeTests +{ + [Fact] + public void TestAdvanceMerge() + { + using var inputFile = File.Open(IntegrationHelpers.GetDocumentPath("Various Content Types.pdf"), FileMode.Open); + using var input = new MemoryStream(); + inputFile.CopyTo(input); + input.Seek(0, SeekOrigin.Begin); + + using var outputFile = File.Open(IntegrationHelpers.GetDocumentPath("EmptyPdf.pdf"), FileMode.Open); + using var output = new MemoryStream(); + outputFile.CopyTo(output); + output.Seek(0, SeekOrigin.Begin); + + using var result = Merge(input, output); + result.Seek(0, SeekOrigin.Begin); + using var outputPdf = PdfDocument.Open(result); + + Assert.Equal(1, outputPdf.NumberOfPages); + Assert.Equal(2, outputPdf.Structure.CrossReferenceTable.Parts.Count); // since we did incremental update, there are 2 xrefs + Assert.True(outputPdf.Structure.CrossReferenceTable.ObjectOffsets.Count > 3); // we add more objects into empty pdf (has 3 objects) + } + + private static Stream Merge(Stream input, Stream output) + { + using var pdf = PdfDocument.Open(input); + + if (!pdf.Structure.Catalog.CatalogDictionary.TryGet(NameToken.Pages, out var pages)) + { + throw new ArgumentException("No pages reference were found"); + } + + if (ResolveIndirect(pdf, pages) is not DictionaryToken pagesObj) + { + throw new ArgumentException("No pages object were found"); + } + + // Assume, we have only 1 page in here + if (!pagesObj.TryGet(NameToken.Kids, out ArrayToken kids) || kids.Length != 1) + { + throw new ArgumentException("Invalid catalog dictionary"); + } + + var kidReference = kids.Data[0] as IndirectReferenceToken; + if (ResolveIndirect(pdf, kidReference) is not DictionaryToken pageObj) + { + throw new ArgumentException("Invalid catalog dictionary"); + } + + // Skip all pdf meta structure objects + var skippedRefs = new HashSet + { + pages.Data, // Pages + kidReference!.Data, // Page + pdf.Structure.Trailer.Root, // Catalog + }; + + // Skip all refs from "skippedRefs" and order it by object number + var oldRefs = pdf.Structure.CrossReferenceTable.ObjectOffsets.Keys + .Where(k => !skippedRefs.Contains(k)) + .OrderBy(k => k.ObjectNumber) + .ToList(); + + using var outputPdf = PdfDocument.Open(output); + + // Building refs map, to rebind old objects to their new values + var refMap = new Dictionary(); + var currentObjectNumber = outputPdf.Structure.Trailer.Size; + foreach (var oldRef in oldRefs) + { + var newRef = new IndirectReference(currentObjectNumber++, 0); + refMap[oldRef] = newRef; + } + + output.Seek(0, SeekOrigin.End); + output.WriteByte((byte)'\n'); // without endline pdf wouldn't render in some readers + + var newPdfObjects = new Dictionary(); + + foreach (var oldRef in oldRefs) + { + var newObjRef = refMap[oldRef]; + var newXref = XrefLocation.File(output.Position); + var token = ResolveIndirect(pdf, oldRef); + var updatedToken = ReplaceReferences(token, refMap); + + newPdfObjects[newObjRef] = newXref; + output.Seek(0, SeekOrigin.End); + TokenWriter.Instance.WriteToken(new ObjectToken(newXref, newObjRef, updatedToken), output); + } + + // Bind input content to the last output page + var lastPageRef = FindLastPage(outputPdf); + + if (ResolveIndirect(outputPdf, lastPageRef) is not DictionaryToken outputPage) + { + throw new ArgumentException("Invalid catalog dictionary"); + } + + if (!pageObj.TryGet(NameToken.Contents, out var contentObj)) + { + throw new ArgumentException("Invalid catalog dictionary"); + } + + // Assume we have resources needed for content to render + if (!pageObj.TryGet(NameToken.Resources, out var resources)) + { + throw new ArgumentException("Invalid page object"); + } + + output.Seek(0, SeekOrigin.End); + var xrefLocation = XrefLocation.File(output.Position); + var newPageObject = outputPage + .With(NameToken.Contents, new IndirectReferenceToken(refMap[contentObj.Data])) + .With(NameToken.Resources, new IndirectReferenceToken(refMap[resources.Data])); + TokenWriter.Instance.WriteToken(new ObjectToken(xrefLocation, lastPageRef.Data, newPageObject), output); + + newPdfObjects[lastPageRef.Data] = xrefLocation; + + // Writer new xref table + TokenWriter.Instance.WriteCrossReferenceTable( + newPdfObjects.ToDictionary(kvp => kvp.Key, kvp => kvp.Value.Value1), + outputPdf.Structure.Trailer.Root, + output, + null, + outputPdf.Structure.XrefOffset); + return output; + } + + private static IToken ReplaceReferences(IToken token, Dictionary mapping) + { + return token switch + { + IndirectReferenceToken irt => mapping.TryGetValue(irt.Data, out var newRef) ? new IndirectReferenceToken(newRef) : token, + DictionaryToken dict => ReplaceDictionary(dict, mapping), + ArrayToken arr => ReplaceArray(arr, mapping), + StreamToken stream => ReplaceStream(stream, mapping), + _ => token + }; + } + + private static DictionaryToken ReplaceDictionary(DictionaryToken original, Dictionary mapping) + { + var newDict = new Dictionary(original.Data.Count); + foreach (var kvp in original.Data) + { + newDict[NameToken.Create(kvp.Key)] = ReplaceReferences(kvp.Value, mapping); + } + return new DictionaryToken(newDict); + } + + private static ArrayToken ReplaceArray(ArrayToken original, Dictionary mapping) + { + var newData = new IToken[original.Length]; + for (var i = 0; i < original.Length; i++) + { + newData[i] = ReplaceReferences(original.Data[i], mapping); + } + return new ArrayToken(newData); + } + + private static StreamToken ReplaceStream(StreamToken original, Dictionary mapping) + { + var updatedDict = ReplaceDictionary(original.StreamDictionary, mapping); + // We create a new StreamToken with the replaced dictionary, preserving the original byte stream. + return new StreamToken(updatedDict, original.Data); + } + + private const int MaxIndirectResolutionDepth = 32; + + private static IndirectReferenceToken FindLastPage(PdfDocument pdf) + { + if (!pdf.Structure.Catalog.CatalogDictionary.TryGet(NameToken.Pages, out var pagesRef)) + { + throw new ArgumentException("No pages were found in the input document."); + } + + if (ResolveIndirect(pdf, pagesRef) is not DictionaryToken pages) + { + throw new ArgumentException("No pages were found in the input file."); + } + + if (!pages.TryGet(NameToken.Kids, out var kids)) + { + throw new ArgumentException("No pages were found in the input document."); + } + + return FindLastPage(pdf, kids); + } + + private static IndirectReferenceToken FindLastPage(PdfDocument pdf, ArrayToken pageTree) + { + while (true) + { + if (pageTree.Length == 0) + { + throw new ArgumentException("No leaf in page tree"); + } + + var root = pageTree.Data.Last()!; + if (ResolveIndirect(pdf, root) is not DictionaryToken newRoot) + { + throw new ArgumentException("Indirect page tree"); + } + + if (newRoot.Data[NameToken.Type] is not NameToken type) + { + throw new ArgumentException("Indirect page tree"); + } + + if (type.Data == NameToken.Page) + { + return (root as IndirectReferenceToken)!; + } + pageTree = (newRoot.Data[NameToken.Kids] as ArrayToken)!; + } + } + + private static IToken ResolveIndirect(PdfDocument doc, IndirectReference reference) + { + return ResolveIndirect(doc, new IndirectReferenceToken(reference)); + } + + private static IToken ResolveIndirect(PdfDocument doc, IToken token) + { + var depth = 0; + while (token is IndirectReferenceToken ir) + { + if (++depth > MaxIndirectResolutionDepth) + { + throw new ArgumentException( + "Cyclic or excessively deep indirect reference in PDF signature dictionary."); + } + + var obj = doc.Structure.GetObject(ir.Data); + token = obj.Data ?? throw new ArgumentException("Failed to parse PDF digital signature."); + } + + return token; + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/EmptyPdf.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/EmptyPdf.pdf new file mode 100644 index 000000000..f4bf70691 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/EmptyPdf.pdf differ diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/capas.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/capas.pdf index a57167ca5..7de7b2255 100644 Binary files a/src/UglyToad.PdfPig.Tests/Integration/Documents/capas.pdf and b/src/UglyToad.PdfPig.Tests/Integration/Documents/capas.pdf differ diff --git a/src/UglyToad.PdfPig.Tests/Integration/IntegrationDocumentTests.cs b/src/UglyToad.PdfPig.Tests/Integration/IntegrationDocumentTests.cs index 2675fdbe8..60c45b747 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/IntegrationDocumentTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/IntegrationDocumentTests.cs @@ -92,13 +92,13 @@ public void CanTokenizeAllAccessibleObjects(string documentName) { Assert.NotNull(document.Structure.Catalog); - //Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count > 0, "Cross reference table was empty."); - //foreach (var objectOffset in document.Structure.CrossReferenceTable.ObjectOffsets) - //{ - // var token = document.Structure.GetObject(objectOffset.Key); + Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count > 0, "Cross reference table was empty."); + foreach (var objectOffset in document.Structure.CrossReferenceTable.ObjectOffsets) + { + var token = document.Structure.GetObject(objectOffset.Key); - // Assert.NotNull(token); - //} + Assert.NotNull(token); + } } } diff --git a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs index 292c1b25e..e32f55cc4 100644 --- a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs @@ -96,6 +96,7 @@ public void OnlyExposedApiIsPublic() "UglyToad.PdfPig.Content.TextOrientation", "UglyToad.PdfPig.Content.XmpMetadata", "UglyToad.PdfPig.CrossReference.CrossReferenceTable", + "UglyToad.PdfPig.CrossReference.CrossReferenceTablePart", "UglyToad.PdfPig.CrossReference.CrossReferenceType", "UglyToad.PdfPig.CrossReference.TrailerDictionary", "UglyToad.PdfPig.Exceptions.PdfDocumentEncryptedException", diff --git a/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs b/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs index 82618b5a6..0f4bb30fa 100644 --- a/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs +++ b/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs @@ -174,12 +174,12 @@ public void CanReadSingleBlankPage() Assert.NotNull(document.Structure.Catalog); - //foreach (var offset in document.Structure.CrossReferenceTable.ObjectOffsets) - //{ - // var obj = document.Structure.GetObject(offset.Key); - - // Assert.NotNull(obj); - //} + foreach (var offset in document.Structure.CrossReferenceTable.ObjectOffsets) + { + var obj = document.Structure.GetObject(offset.Key); + + Assert.NotNull(obj); + } } } @@ -988,8 +988,8 @@ public void CanDedupObjectsFromSameDoc_Builder() using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff)) { Assert.Equal(2, document.NumberOfPages); - // Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count <= 29, - // "Expected object count to be lower than 30"); // 45 objects with duplicates, 29 with correct re-use + Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count <= 29, + "Expected object count to be lower than 30"); // 45 objects with duplicates, 29 with correct re-use } } } @@ -1010,8 +1010,8 @@ public void CanDedupObjectsFromDifferentDoc_HashBuilder() using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff)) { Assert.Equal(2, document.NumberOfPages); - // Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count <= 29, - // "Expected object count to be lower than 30"); // 45 objects with duplicates, 29 with correct re-use + Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count <= 29, + "Expected object count to be lower than 30"); // 45 objects with duplicates, 29 with correct re-use } } } @@ -1404,10 +1404,12 @@ public void WriteObject(long objectNumber, int generation, byte[] data, Stream o Objects++; } - public void WriteCrossReferenceTable(IReadOnlyDictionary objectOffsets, + public void WriteCrossReferenceTable( + IReadOnlyDictionary objectOffsets, IndirectReference catalogToken, Stream outputStream, - IndirectReference? documentInformationReference) + IndirectReference? documentInformationReference, + long? prevTableLocation) { WroteCrossReferenceTable = true; } diff --git a/src/UglyToad.PdfPig.Tests/Writer/PdfMergerTests.cs b/src/UglyToad.PdfPig.Tests/Writer/PdfMergerTests.cs index 32d9f5645..681f5ae72 100644 --- a/src/UglyToad.PdfPig.Tests/Writer/PdfMergerTests.cs +++ b/src/UglyToad.PdfPig.Tests/Writer/PdfMergerTests.cs @@ -96,8 +96,8 @@ public void ObjectCountLower() using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff)) { Assert.Equal(2, document.NumberOfPages); - // Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count <= 24, - // "Expected object count to be lower than 24"); + Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count <= 24, + "Expected object count to be lower than 24"); } } @@ -111,8 +111,8 @@ public void DedupsObjectsFromSameDoc() using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff)) { Assert.Equal(2, document.NumberOfPages); - // Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count <= 29, - // "Expected object count to be lower than 30"); // 45 objects with duplicates, 29 with correct re-use + Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count <= 29, + "Expected object count to be lower than 30"); // 45 objects with duplicates, 29 with correct re-use } } diff --git a/src/UglyToad.PdfPig.Tokens/DictionaryToken.cs b/src/UglyToad.PdfPig.Tokens/DictionaryToken.cs index 99b217316..1beea7295 100644 --- a/src/UglyToad.PdfPig.Tokens/DictionaryToken.cs +++ b/src/UglyToad.PdfPig.Tokens/DictionaryToken.cs @@ -1,171 +1,176 @@ -namespace UglyToad.PdfPig.Tokens -{ - using System; - using System.Collections.Generic; - using System.Linq; - - /// - /// A dictionary object is an associative table containing pairs of objects, known as the dictionary's entries. - /// The key must be a and the value may be an kind of . - /// - public class DictionaryToken : IDataToken>, IEquatable - { - /// - /// The key value pairs in this dictionary. - /// - public IReadOnlyDictionary Data { get; } - - /// - /// Create a new . - /// - /// The data this dictionary will contain. - public DictionaryToken(IReadOnlyDictionary data) - { - if (data == null) - { - throw new ArgumentNullException(nameof(data)); - } - - var result = new Dictionary(data.Count); - - foreach (var keyValuePair in data) - { - result[keyValuePair.Key.Data] = keyValuePair.Value; - } - - Data = result; - } - - private DictionaryToken(IReadOnlyDictionary data) - { - Data = data; - } - - /// - /// Try and get the entry with a given name. - /// - /// The name of the entry to retrieve. - /// The token, if it is found. - /// if the token is found, otherwise. - public bool TryGet(NameToken name, out IToken token) - { - if (name == null) - { - throw new ArgumentNullException(nameof(name)); - } - - return Data.TryGetValue(name.Data, out token); - } - - /// - /// Try and get the entry with a given name and a specific data type. - /// - /// The expected data type of the dictionary value. - /// The name of the entry to retrieve. - /// The token, if it is found. - /// if the token is found with this type, otherwise. - public bool TryGet(NameToken name, out T token) where T : IToken - { - token = default(T); - if (!TryGet(name, out var t) || !(t is T typedToken)) - { - return false; - } - - token = typedToken; - return true; - } - - /// - /// Whether the dictionary contains an entry with this name. - /// - /// The name to check. - /// if the token is found, otherwise. - public bool ContainsKey(NameToken name) - { - return Data.ContainsKey(name.Data); - } - - /// - /// Create a copy of this dictionary with the additional entry (or override the value of the existing entry). - /// - /// The key of the entry to create or override. - /// The value of the entry to create or override. - /// A new with the entry created or modified. - public DictionaryToken With(NameToken key, IToken value) => With(key.Data, value); - - /// - /// Create a copy of this dictionary with the additional entry (or override the value of the existing entry). - /// - /// The key of the entry to create or override. - /// The value of the entry to create or override. - /// A new with the entry created or modified. - public DictionaryToken With(string key, IToken value) - { - if (key == null) - { - throw new ArgumentNullException(nameof(key)); - } - - if (value == null) - { - throw new ArgumentNullException(nameof(value)); - } - - var result = new Dictionary(Data.Count + 1); - - foreach (var keyValuePair in Data) - { - result[keyValuePair.Key] = keyValuePair.Value; - } - - result[key] = value; - - return new DictionaryToken(result); - } - +namespace UglyToad.PdfPig.Tokens +{ + using System; + using System.Collections.Generic; + using System.Linq; + + /// + /// A dictionary object is an associative table containing pairs of objects, known as the dictionary's entries. + /// The key must be a and the value may be an kind of . + /// + public class DictionaryToken : IDataToken>, IEquatable + { + /// + /// The key value pairs in this dictionary. + /// + public IReadOnlyDictionary Data { get; } + + /// + /// Empty DictionaryToken instance + /// + public static readonly DictionaryToken Empty = new(new Dictionary()); + + /// + /// Create a new . + /// + /// The data this dictionary will contain. + public DictionaryToken(IReadOnlyDictionary data) + { + if (data == null) + { + throw new ArgumentNullException(nameof(data)); + } + + var result = new Dictionary(data.Count); + + foreach (var keyValuePair in data) + { + result[keyValuePair.Key.Data] = keyValuePair.Value; + } + + Data = result; + } + + private DictionaryToken(IReadOnlyDictionary data) + { + Data = data; + } + + /// + /// Try and get the entry with a given name. + /// + /// The name of the entry to retrieve. + /// The token, if it is found. + /// if the token is found, otherwise. + public bool TryGet(NameToken name, out IToken token) + { + if (name == null) + { + throw new ArgumentNullException(nameof(name)); + } + + return Data.TryGetValue(name.Data, out token); + } + + /// + /// Try and get the entry with a given name and a specific data type. + /// + /// The expected data type of the dictionary value. + /// The name of the entry to retrieve. + /// The token, if it is found. + /// if the token is found with this type, otherwise. + public bool TryGet(NameToken name, out T token) where T : IToken + { + token = default(T); + if (!TryGet(name, out var t) || !(t is T typedToken)) + { + return false; + } + + token = typedToken; + return true; + } + + /// + /// Whether the dictionary contains an entry with this name. + /// + /// The name to check. + /// if the token is found, otherwise. + public bool ContainsKey(NameToken name) + { + return Data.ContainsKey(name.Data); + } + + /// + /// Create a copy of this dictionary with the additional entry (or override the value of the existing entry). + /// + /// The key of the entry to create or override. + /// The value of the entry to create or override. + /// A new with the entry created or modified. + public DictionaryToken With(NameToken key, IToken value) => With(key.Data, value); + + /// + /// Create a copy of this dictionary with the additional entry (or override the value of the existing entry). + /// + /// The key of the entry to create or override. + /// The value of the entry to create or override. + /// A new with the entry created or modified. + public DictionaryToken With(string key, IToken value) + { + if (key == null) + { + throw new ArgumentNullException(nameof(key)); + } + + if (value == null) + { + throw new ArgumentNullException(nameof(value)); + } + + var result = new Dictionary(Data.Count + 1); + + foreach (var keyValuePair in Data) + { + result[keyValuePair.Key] = keyValuePair.Value; + } + + result[key] = value; + + return new DictionaryToken(result); + } + /// /// Creates a copy of this dictionary with the entry with the specified key removed (if it exists). /// /// The key of the entry to remove. - /// A new with the entry removed. - public DictionaryToken Without(NameToken key) => Without(key.Data); - + /// A new with the entry removed. + public DictionaryToken Without(NameToken key) => Without(key.Data); + /// /// Creates a copy of this dictionary with the entry with the specified key removed (if it exists). /// /// The key of the entry to remove. - /// A new with the entry removed. - public DictionaryToken Without(string key) - { - if (key == null) - { - throw new ArgumentNullException(nameof(key)); - } - - var result = new Dictionary(Data.ContainsKey(key) ? Data.Count - 1 : Data.Count); - - foreach (var keyValuePair in Data.Where(x => !x.Key.Equals(key))) - { - result[keyValuePair.Key] = keyValuePair.Value; - } - - return new DictionaryToken(result); - } - - /// - /// Create a new . - /// - /// The data this dictionary will contain. - public static DictionaryToken With(IReadOnlyDictionary data) - { - return new DictionaryToken(data ?? throw new ArgumentNullException(nameof(data))); - } - - /// - public bool Equals(IToken obj) - { - return Equals(obj as DictionaryToken); + /// A new with the entry removed. + public DictionaryToken Without(string key) + { + if (key == null) + { + throw new ArgumentNullException(nameof(key)); + } + + var result = new Dictionary(Data.ContainsKey(key) ? Data.Count - 1 : Data.Count); + + foreach (var keyValuePair in Data.Where(x => !x.Key.Equals(key))) + { + result[keyValuePair.Key] = keyValuePair.Value; + } + + return new DictionaryToken(result); + } + + /// + /// Create a new . + /// + /// The data this dictionary will contain. + public static DictionaryToken With(IReadOnlyDictionary data) + { + return new DictionaryToken(data ?? throw new ArgumentNullException(nameof(data))); + } + + /// + public bool Equals(IToken obj) + { + return Equals(obj as DictionaryToken); } /// @@ -176,32 +181,32 @@ public bool Equals(DictionaryToken other) return false; } - if (ReferenceEquals(this, other)) - { - return true; + if (ReferenceEquals(this, other)) + { + return true; + } + + if (Data.Count != other.Data.Count) + { + return false; + } + + foreach (var kvp in other.Data) + { + if (!Data.TryGetValue(kvp.Key, out var val) || !val.Equals(kvp.Value)) + { + return false; + } } - if (Data.Count != other.Data.Count) - { - return false; - } - - foreach (var kvp in other.Data) - { - if (!Data.TryGetValue(kvp.Key, out var val) || !val.Equals(kvp.Value)) - { - return false; - } - } - return true; - } - - /// - public override string ToString() - { - return string.Join(", ", Data.Select(x => $"<{x.Key}, {x.Value}>")); + } + + /// + public override string ToString() + { + return string.Join(", ", Data.Select(x => $"<{x.Key}, {x.Value}>")); } - } -} + } +} diff --git a/src/UglyToad.PdfPig.Tokens/NumericToken.cs b/src/UglyToad.PdfPig.Tokens/NumericToken.cs index 72461f3d2..733ac5ad8 100644 --- a/src/UglyToad.PdfPig.Tokens/NumericToken.cs +++ b/src/UglyToad.PdfPig.Tokens/NumericToken.cs @@ -166,6 +166,15 @@ public NumericToken(int value) { Data = value; } + + /// + /// Create a . + /// + /// The number to represent. + public NumericToken(long value) + { + Data = value; + } /// /// Create a . diff --git a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTable.cs b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTable.cs index 91be7106c..2db3870a9 100644 --- a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTable.cs +++ b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTable.cs @@ -3,6 +3,8 @@ using System; using System.Collections.Generic; using Core; + using Parser.FileStructure; + using System.Linq; /// /// The cross-reference table contains information that enables random access to PDF objects within the file by object number @@ -13,49 +15,64 @@ /// public class CrossReferenceTable { - private readonly Dictionary objectOffsets; + private readonly CrossReferenceTablePart[] parts; + private readonly Dictionary objectOffsets; /// /// The corresponding byte offset for each keyed object in this document. /// - public IReadOnlyDictionary ObjectOffsets => objectOffsets; + public IReadOnlyDictionary ObjectOffsets => objectOffsets; + + /// + /// List of all xref tables parts, added during all incremental updates + /// + public IReadOnlyList Parts => parts; /// /// The type of the first cross-reference table located in this document. /// + [Obsolete("Useless property. Use each CrossReferenceTablePart.Type instead.")] public CrossReferenceType Type { get; } /// /// The trailer dictionary. /// + [Obsolete("Use trailer dictionary from PdfDocument.Structure property instead")] public TrailerDictionary Trailer { get; } /// /// The byte offsets of each cross-reference table or stream in this document and the previous /// table or stream they link to if applicable. /// + [Obsolete("Useless property. Use each CrossReferenceTablePart.Offset/Prev instead")] public IReadOnlyList CrossReferenceOffsets { get; } - internal CrossReferenceTable(CrossReferenceType type, IReadOnlyDictionary objectOffsets, - TrailerDictionary trailer, - IReadOnlyList crossReferenceOffsets) + internal CrossReferenceTable( + IReadOnlyList sections, + IReadOnlyDictionary objectOffsets, + TrailerDictionary trailer) { if (objectOffsets is null) { throw new ArgumentNullException(nameof(objectOffsets)); } - Type = type; - Trailer = trailer ?? throw new ArgumentNullException(nameof(trailer)); - CrossReferenceOffsets = crossReferenceOffsets ?? throw new ArgumentNullException(nameof(crossReferenceOffsets)); - - var result = new Dictionary(capacity: objectOffsets.Count); - foreach (var objectOffset in objectOffsets) + if (sections is null) { - result[objectOffset.Key] = objectOffset.Value; + throw new ArgumentNullException(nameof(sections)); } + + parts = sections + .Select(CrossReferenceTablePart.FromXrefSection) + .ToArray(); - this.objectOffsets = result; + CrossReferenceOffsets = parts + .Select(p => new CrossReferenceOffset(p.Offset, p.Previous)) + .ToArray(); + + Type = parts.FirstOrDefault()?.Type ?? CrossReferenceType.Table; + Trailer = trailer ?? throw new ArgumentNullException(nameof(trailer)); + this.objectOffsets = objectOffsets.ToDictionary(kvp => kvp.Key, kvp => kvp.Value); } /// diff --git a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTableBuilder.cs b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTableBuilder.cs deleted file mode 100644 index 7f32fb66a..000000000 --- a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTableBuilder.cs +++ /dev/null @@ -1,130 +0,0 @@ -namespace UglyToad.PdfPig.CrossReference -{ - using System; - using System.Collections.Generic; - using System.Linq; - using Core; - using Logging; - using Tokens; - - /// - /// - /// - /// - /// The table contains a one-line entry for each indirect object, specifying the location of that object within the body of the file. - /// - internal class CrossReferenceTableBuilder - { - private readonly List parts = new List(); - public IReadOnlyList Parts => parts; - - public void Add(CrossReferenceTablePart part) - { - if (part is null) - { - throw new ArgumentNullException(nameof(part)); - } - - parts.Add(part); - } - - public CrossReferenceTable Build(long firstCrossReferenceOffset, long offsetCorrection, bool isLenientParsing, ILog log) - { - CrossReferenceType type = CrossReferenceType.Table; - DictionaryToken trailerDictionary = new DictionaryToken(new Dictionary()); - Dictionary objectOffsets = new Dictionary(); - - var xrefPartToBytePositionOrder = new List(); - - var currentPart = parts.FirstOrDefault(x => x.Offset == firstCrossReferenceOffset); - - if (currentPart is null) - { - // no XRef at given position - log.Warn($"Did not find an XRef object at the specified startxref position {firstCrossReferenceOffset}"); - - // use all objects in byte position order (last entries overwrite previous ones) - xrefPartToBytePositionOrder.AddRange(parts.Select(x => x.Offset)); - xrefPartToBytePositionOrder.Sort(); - } - else - { - // copy xref type - type = currentPart.Type; - - // found starting Xref object - // add this and follow chain defined by 'Prev' keys - xrefPartToBytePositionOrder.Add(firstCrossReferenceOffset); - - while (currentPart.Dictionary != null) - { - // Get any streams that are tied to this table. - var activePart = currentPart; - var dependents = parts.Where(x => x.TiedToXrefAtOffset == activePart.Offset); - foreach (var dependent in dependents) - { - xrefPartToBytePositionOrder.Add(dependent.Offset); - } - - long prevBytePos = currentPart.GetPreviousOffset(); - if (prevBytePos == -1) - { - break; - } - - currentPart = parts.FirstOrDefault(x => x.Offset == prevBytePos || x.Offset == prevBytePos + offsetCorrection); - if (currentPart is null) - { - log.Warn("Did not found XRef object pointed to by 'Prev' key at position " + prevBytePos); - break; - } - - xrefPartToBytePositionOrder.Add(prevBytePos); - - // sanity check to prevent infinite loops - if (xrefPartToBytePositionOrder.Count >= parts.Count) - { - break; - } - } - - // have to reverse order so that later XRefs will overwrite previous ones - xrefPartToBytePositionOrder.Reverse(); - } - - // merge used and sorted XRef/trailer - foreach (long bPos in xrefPartToBytePositionOrder) - { - var currentObject = parts.First(x => x.Offset == bPos || x.Offset == bPos + offsetCorrection); - if (currentObject.Dictionary != null) - { - foreach (var entry in currentObject.Dictionary.Data) - { - /* - * If we're at a second trailer, we have a linearized pdf file, meaning that the first Size entry represents - * all of the objects so we don't need to grab the second. - */ - if (!entry.Key.Equals("Size", StringComparison.OrdinalIgnoreCase) - || !trailerDictionary.ContainsKey(NameToken.Size)) - { - trailerDictionary = trailerDictionary.With(entry.Key, entry.Value); - } - } - } - - foreach (var item in currentObject.ObjectOffsets) - { - objectOffsets[item.Key] = item.Value; - } - } - - return new CrossReferenceTable(type, objectOffsets, new TrailerDictionary(trailerDictionary, isLenientParsing), - parts.Select(x => - { - var prev = x.GetPreviousOffset(); - - return new CrossReferenceTable.CrossReferenceOffset(x.Offset, prev >= 0 ? prev : default(long?)); - }).ToList()); - } - } -} diff --git a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePart.cs b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePart.cs index eb074671f..483e5db7e 100644 --- a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePart.cs +++ b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePart.cs @@ -2,6 +2,8 @@ { using System.Collections.Generic; using Core; + using Parser.FileStructure; + using System.Linq; using Tokens; /// @@ -21,15 +23,15 @@ /// giving the number of bytes from the beginning of the file to the beginning of the /// object. /// - internal class CrossReferenceTablePart + public class CrossReferenceTablePart { - public IReadOnlyDictionary ObjectOffsets { get; } + public IReadOnlyDictionary ObjectOffsets { get; } - public long Offset { get; private set; } + public long Offset { get; } - public long Previous { get; } + public long? Previous { get; } - public DictionaryToken Dictionary { get; private set; } + public DictionaryToken Dictionary { get; } public CrossReferenceType Type { get; } @@ -39,8 +41,9 @@ internal class CrossReferenceTablePart public long? TiedToXrefAtOffset { get; } public CrossReferenceTablePart( - IReadOnlyDictionary objectOffsets, - long offset, long previous, + IReadOnlyDictionary objectOffsets, + long offset, + long? previous, DictionaryToken dictionary, CrossReferenceType type, long? tiedToXrefAtOffset) @@ -53,20 +56,28 @@ public CrossReferenceTablePart( TiedToXrefAtOffset = tiedToXrefAtOffset; } - public void FixOffset(long offset) + internal static CrossReferenceTablePart FromXrefSection(IXrefSection xrefSection) { - Offset = offset; - Dictionary = Dictionary.With(NameToken.Prev, new NumericToken((double)offset)); - } + long? tiedToXrefAtOffset = null; - public long GetPreviousOffset() - { - if (Dictionary.TryGet(NameToken.Prev, out var token) && token is NumericToken numeric) + if (xrefSection.Dictionary is not null) { - return numeric.Long; + tiedToXrefAtOffset = xrefSection.Dictionary.TryGet(NameToken.XrefStm, out var xrefStm) + ? xrefStm.Long + : tiedToXrefAtOffset; } - return -1; + var partType = xrefSection is XrefStream + ? CrossReferenceType.Stream + : CrossReferenceType.Table; + + return new CrossReferenceTablePart( + xrefSection.ObjectOffsets, + xrefSection.Offset, + xrefSection.GetPrevious(), + xrefSection.Dictionary ?? DictionaryToken.Empty, + partType, + tiedToXrefAtOffset); } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePartBuilder.cs b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePartBuilder.cs deleted file mode 100644 index 9ff6039cc..000000000 --- a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePartBuilder.cs +++ /dev/null @@ -1,42 +0,0 @@ -namespace UglyToad.PdfPig.CrossReference -{ - using System.Collections.Generic; - using Core; - using Tokens; - - internal class CrossReferenceTablePartBuilder - { - private readonly Dictionary objects = new Dictionary(); - - public long Offset { get; set; } - - public long Previous { get; set; } - - public DictionaryToken? Dictionary { get; set; } - - public CrossReferenceType XRefType { get; set; } - - public long? TiedToPreviousAtOffset { get; set; } - - public void Add(long objectId, int generationNumber, long offset) - { - if (generationNumber > ushort.MaxValue) - { - // We skip invalid generation number - return; - } - - IndirectReference objKey = new IndirectReference(objectId, generationNumber); - - if (!objects.ContainsKey(objKey)) - { - objects[objKey] = offset; - } - } - - public CrossReferenceTablePart Build() - { - return new CrossReferenceTablePart(objects, Offset, Previous, Dictionary!, XRefType, TiedToPreviousAtOffset); - } - } -} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs index 9289e2bfb..0d2f6ded4 100644 --- a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs +++ b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs @@ -165,11 +165,16 @@ private static PdfDocument OpenDocument( pdfScanner.UpdateEncryptionHandler(encryptionHandler); + var crossReferenceTable = new CrossReferenceTable( + initialParse.Parts, + initialParse.XrefOffsets, + trailer); + var cidFontFactory = new CidFontFactory( parsingOptions.Logger, pdfScanner, filterProvider); - + var encodingReader = new EncodingReader(pdfScanner); var cmapCache = new CMapLocalCache(filterProvider, pdfScanner); @@ -238,7 +243,9 @@ private static PdfDocument OpenDocument( filterProvider, acroFormFactory, bookmarksProvider, - parsingOptions); + parsingOptions, + crossReferenceTable, + trailer); } private static (IndirectReference, DictionaryToken) ParseTrailer( diff --git a/src/UglyToad.PdfPig/PdfDocument.cs b/src/UglyToad.PdfPig/PdfDocument.cs index 5077410ce..98bdf137e 100644 --- a/src/UglyToad.PdfPig/PdfDocument.cs +++ b/src/UglyToad.PdfPig/PdfDocument.cs @@ -7,6 +7,7 @@ using AcroForms; using Content; using Core; + using CrossReference; using Encryption; using Exceptions; using Filters; @@ -76,7 +77,9 @@ internal PdfDocument( ILookupFilterProvider filterProvider, AcroFormFactory acroFormFactory, BookmarksProvider bookmarksProvider, - ParsingOptions parsingOptions) + ParsingOptions parsingOptions, + CrossReferenceTable crossReferenceTable, + TrailerDictionary trailer) { this.inputBytes = inputBytes; this.version = version ?? throw new ArgumentNullException(nameof(version)); @@ -89,7 +92,7 @@ internal PdfDocument( Information = information ?? throw new ArgumentNullException(nameof(information)); pages = catalog.Pages; namedDestinations = catalog.NamedDestinations; - Structure = new Structure(catalog, pdfScanner); + Structure = new Structure(catalog, pdfScanner, trailer, crossReferenceTable); Advanced = new AdvancedPdfDocumentAccess(pdfScanner, filterProvider, catalog); documentForm = new Lazy(() => acroFormFactory.GetAcroForm(catalog)!); } diff --git a/src/UglyToad.PdfPig/Structure.cs b/src/UglyToad.PdfPig/Structure.cs index 4542e7ca4..2b1dd8f65 100644 --- a/src/UglyToad.PdfPig/Structure.cs +++ b/src/UglyToad.PdfPig/Structure.cs @@ -3,6 +3,8 @@ using System; using Content; using Core; + using CrossReference; + using System.Linq; using Tokenization.Scanner; using Tokens; @@ -15,6 +17,21 @@ public class Structure /// The root of the document's hierarchy providing access to the page tree as well as other information. /// public Catalog Catalog { get; } + + /// + /// The xref table of the document. Contains objects from all parsed xref tables. + /// + public CrossReferenceTable CrossReferenceTable { get; } + + /// + /// The trailer dictionary of the document. Contains most bottom trailer + /// + public TrailerDictionary Trailer { get; } + + /// + /// The offset of the xref table/object stream + /// + public long XrefOffset { get; } /// /// Provides access to tokenization capabilities for objects by object number. @@ -23,10 +40,15 @@ public class Structure internal Structure( Catalog catalog, - IPdfTokenScanner scanner) + IPdfTokenScanner scanner, + TrailerDictionary trailer, + CrossReferenceTable xrefTable) { + Trailer = trailer ?? throw new ArgumentNullException(nameof(trailer)); Catalog = catalog ?? throw new ArgumentNullException(nameof(catalog)); TokenScanner = scanner ?? throw new ArgumentNullException(nameof(scanner)); + CrossReferenceTable = xrefTable ?? throw new ArgumentNullException(nameof(xrefTable)); + XrefOffset = CrossReferenceTable.Parts.Count > 0 ? CrossReferenceTable.Parts.Last().Offset : 0; } /// diff --git a/src/UglyToad.PdfPig/Writer/ITokenWriter.cs b/src/UglyToad.PdfPig/Writer/ITokenWriter.cs index 4b9f449fc..bb7f0abb6 100644 --- a/src/UglyToad.PdfPig/Writer/ITokenWriter.cs +++ b/src/UglyToad.PdfPig/Writer/ITokenWriter.cs @@ -33,10 +33,13 @@ public interface ITokenWriter /// The object representing the catalog dictionary which is referenced from the trailer dictionary. /// The output stream to write to. /// The object reference for the document information dictionary if present. + /// The offset to the previous xref table if present void WriteCrossReferenceTable( IReadOnlyDictionary objectOffsets, - IndirectReference catalogToken, Stream outputStream, - IndirectReference? documentInformationReference); + IndirectReference catalogToken, + Stream outputStream, + IndirectReference? documentInformationReference, + long? prevTableLocation); /// /// Hints to the token writer that we are currently writing page contents. diff --git a/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs b/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs index cd912ba9a..68b071201 100644 --- a/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs +++ b/src/UglyToad.PdfPig/Writer/PdfStreamWriter.cs @@ -95,7 +95,7 @@ public void InitializePdf(double version) public void CompletePdf(IndirectReferenceToken catalogReference, IndirectReferenceToken? documentInformationReference = null) { - TokenWriter.WriteCrossReferenceTable(offsets, catalogReference.Data, Stream, documentInformationReference?.Data); + TokenWriter.WriteCrossReferenceTable(offsets, catalogReference.Data, Stream, documentInformationReference?.Data, null); } public void Dispose() diff --git a/src/UglyToad.PdfPig/Writer/TokenWriter.cs b/src/UglyToad.PdfPig/Writer/TokenWriter.cs index 9e9cf1479..dd3d09166 100644 --- a/src/UglyToad.PdfPig/Writer/TokenWriter.cs +++ b/src/UglyToad.PdfPig/Writer/TokenWriter.cs @@ -144,10 +144,12 @@ public void WriteToken(IToken token, Stream outputStream) } /// - public void WriteCrossReferenceTable(IReadOnlyDictionary objectOffsets, + public void WriteCrossReferenceTable( + IReadOnlyDictionary objectOffsets, IndirectReference catalogToken, Stream outputStream, - IndirectReference? documentInformationReference) + IndirectReference? documentInformationReference, + long? prevXrefTableLocation) { if (objectOffsets.Count == 0) { @@ -263,6 +265,11 @@ public void WriteCrossReferenceTable(IReadOnlyDictionary