diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b1f107c2..85f87f266 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ### Unreleased +- Add support for Embedded Files and File Attachment Annotations - Accessibility support - Replace integration tests by visual regression tests - Fix access permissions in PDF version 1.7ext3 diff --git a/demo/attachment.js b/demo/attachment.js new file mode 100644 index 000000000..5d45a4d8e --- /dev/null +++ b/demo/attachment.js @@ -0,0 +1,42 @@ +const PDFDocument = require('../'); +const fs = require('fs'); +const path = require('path'); + +const doc = new PDFDocument({ pdfVersion: '1.4' }); + +doc.pipe(fs.createWriteStream('attachment.pdf')); + +doc.info['Title'] = 'Attachment Test'; + +// add an embedded file from file system +doc.file(path.join(__dirname, 'images', 'test.png'), { + name: 'test.png', + type: 'image/png', + description: 'this is a test image' +}); + +// add some text +doc.text(`This PDF contains three text files: +Two file attachment annotations and one embedded file. +If you can see them (not every PDF viewer supports embedded files), +hover over the paperclip to see its description!`); + +// add a file attachment annotation +// first, declare the file to be attached +const file = { + src: Buffer.from('buffered input!'), + name: 'embedded.txt', + creationDate: new Date(2020, 3, 1) +}; +// then, add the annotation +doc.fileAnnotation(100, 150, 10, doc.currentLineHeight(), file); + +// declared files can be reused, but they will show up separately in the PDF Viewer's attachments panel +// we're going to use the paperclip icon for this one together with a short description +// be aware that some PDF Viewers may not render the icon correctly — or not at all +doc.fileAnnotation(150, 150, 10, doc.currentLineHeight(), file, { + Name: 'Paperclip', + Contents: 'Paperclip attachment' +}); + +doc.end(); diff --git a/demo/attachment.pdf b/demo/attachment.pdf new file mode 100644 index 000000000..6e879b2d0 Binary files /dev/null and b/demo/attachment.pdf differ diff --git a/docs/annotations.md b/docs/annotations.md index e4969e1f8..16c1841c6 100644 --- a/docs/annotations.md +++ b/docs/annotations.md @@ -16,6 +16,7 @@ and some other properties. Here is a list of the available annotation methods: * `rectAnnotation(x, y, width, height, options)` * `ellipseAnnotation(x, y, width, height, options)` * `textAnnotation(x, y, width, height, text, options)` +* `fileAnnotation(x, y, width, height, file, options)` Many of the annotations have a `color` option that you can specify. You can use an array of RGB values, a hex color, or a named CSS color value for that diff --git a/docs/attachments.md b/docs/attachments.md new file mode 100644 index 000000000..e4bb12fa9 --- /dev/null +++ b/docs/attachments.md @@ -0,0 +1,53 @@ +# Attachments in PDFKit + +## Embedded Files + +Embedded files make it possible to embed any external file into a PDF. +Adding an embedded file is as simple as calling the `file` method and specifying a filepath. + + doc.file(path.join(__dirname, 'example.txt')) + +It is also possible to embed data directly as a Buffer, ArrayBuffer or base64 encoded string. +If you are embedding data, it is recommended you also specify a filename like this: + + doc.file(Buffer.from('this will be a text file'), { name: 'example.txt' }) + +When embedding a data URL, the `type` option will be set to the data URL's MIME type automatically: + + doc.file('data:text/plain;base64,YmFzZTY0IHN0cmluZw==', { name: 'base64.txt' }) + +There are a few other options for `doc.file`: + +* `name` - specify the embedded file's name +* `type` - specify the embedded file's subtype as a [MIME-Type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types) +* `description` - add descriptive text for the embedded file +* `hidden` - if true, do not show file in the list of embedded files +* `creationDate` - override the date and time the file was created +* `modifiedDate` - override the date and time the file was last updated + +If you are attaching a file from your file system, creationDate and modifiedDate will be set to the source file's creationDate and modifiedDate. + +Setting the `hidden` option prevents this file from showing up in the pdf viewer's attachment panel. +While this may not be very useful for embedded files, it is absolutely necessary for file annotations, to prevent them from showing up twice in the attachment panel. + +## File Annotations + +A file annotation contains a reference to an embedded file that can be placed anywhere in the document. +File annotations show up in your reader's annotation panel as well as the attachment panel. + +In order to add a file annotation, you should first read the chapter on annotations. +Like other annotations, you specify position and size with `x`, `y`, `width` and `height`, unlike other annotations you must also specify a file object. +The file object may contain the same options as `doc.file` in the previous section with the addition of the source file or buffered data in `src`. + +Here is an example of adding a file annotation: + + const file = { + src: path.join(__dirname, 'example.txt'), + name: 'example.txt', + description: 'file annotation description' + } + const options = { Name: 'Paperclip' } + + doc.fileAnnotation(100, 100, 100, 100, file, options) + +The annotation's appearance may be changed by setting the `Name` option to one of the three predefined icons `GraphPush`, `Paperclip` or `Push` (default value). diff --git a/docs/generate.js b/docs/generate.js index d6764088b..2ddce61c6 100644 --- a/docs/generate.js +++ b/docs/generate.js @@ -325,6 +325,7 @@ render(doc, 'images.md'); render(doc, 'outline.md'); render(doc, 'annotations.md'); render(doc, 'destinations.md'); +render(doc, 'attachments.md'); render(doc, 'accessibility.md'); render(doc, 'you_made_it.md'); doc.end(); diff --git a/docs/generate_website.js b/docs/generate_website.js index 36c4d5243..1d08c2bab 100644 --- a/docs/generate_website.js +++ b/docs/generate_website.js @@ -21,6 +21,7 @@ const files = [ 'outline.md', 'annotations.md', 'destinations.md', + 'attachments.md', 'accessibility.md', 'you_made_it.md' ]; diff --git a/docs/guide.pdf b/docs/guide.pdf index 4313c05bb..3c1a28d41 100644 Binary files a/docs/guide.pdf and b/docs/guide.pdf differ diff --git a/lib/document.js b/lib/document.js index c5f9c5bc0..310d3ebf6 100644 --- a/lib/document.js +++ b/lib/document.js @@ -19,6 +19,7 @@ import AnnotationsMixin from './mixins/annotations'; import OutlineMixin from './mixins/outline'; import MarkingsMixin from './mixins/markings'; import AcroFormMixin from './mixins/acroform'; +import AttachmentsMixin from './mixins/attachments'; import LineWrapper from './line_wrapper'; class PDFDocument extends stream.Readable { @@ -212,6 +213,16 @@ class PDFDocument extends stream.Readable { this._root.data.Names.data.Dests.add(name, args); } + addNamedEmbeddedFile(name, ref) { + if (!this._root.data.Names.data.EmbeddedFiles) { + // disabling /Limits for this tree fixes attachments not showing in Adobe Reader + this._root.data.Names.data.EmbeddedFiles = new PDFNameTree({ limits: false }); + } + + // add filespec to EmbeddedFiles + this._root.data.Names.data.EmbeddedFiles.add(name, ref); + } + addNamedJavaScript(name, js) { if (!this._root.data.Names.data.JavaScript) { this._root.data.Names.data.JavaScript = new PDFNameTree(); @@ -366,6 +377,7 @@ mixin(AnnotationsMixin); mixin(OutlineMixin); mixin(MarkingsMixin); mixin(AcroFormMixin); +mixin(AttachmentsMixin); PDFDocument.LineWrapper = LineWrapper; diff --git a/lib/mixins/annotations.js b/lib/mixins/annotations.js index 5c7e23413..4b3d9eb23 100644 --- a/lib/mixins/annotations.js +++ b/lib/mixins/annotations.js @@ -129,6 +129,26 @@ export default { return this.annotate(x, y, w, h, options); }, + fileAnnotation(x, y, w, h, file = {}, options = {}) { + // create hidden file + const filespec = this.file( + file.src, + Object.assign({ hidden: true }, file) + ); + + options.Subtype = 'FileAttachment'; + options.FS = filespec; + + // add description from filespec unless description (Contents) has already been set + if (options.Contents) { + options.Contents = new String(options.Contents); + } else if (filespec.data.Desc) { + options.Contents = filespec.data.Desc; + } + + return this.annotate(x, y, w, h, options); + }, + _convertRect(x1, y1, w, h) { // flip y1 and y2 let y2 = y1; diff --git a/lib/mixins/attachments.js b/lib/mixins/attachments.js new file mode 100644 index 000000000..890ce27f5 --- /dev/null +++ b/lib/mixins/attachments.js @@ -0,0 +1,118 @@ +const fs = require('fs'); +const { createHash } = require('crypto'); + +export default { + /** + * Embed contents of `src` in PDF + * @param {Buffer | ArrayBuffer | string} src input Buffer, ArrayBuffer, base64 encoded string or path to file + * @param {object} options + * * options.name: filename to be shown in PDF, will use `src` if none set + * * options.type: filetype to be shown in PDF + * * options.description: description to be shown in PDF + * * options.hidden: if true, do not add attachment to EmbeddedFiles dictionary. Useful for file attachment annotations + * * options.creationDate: override creation date + * * options.modifiedDate: override modified date + * @returns filespec reference + */ + file(src, options = {}) { + options.name = options.name || src; + + const refBody = { + Type: 'EmbeddedFile', + Params: {} + }; + let data; + + if (!src) { + throw new Error('No src specified'); + } + if (Buffer.isBuffer(src)) { + data = src; + } else if (src instanceof ArrayBuffer) { + data = Buffer.from(new Uint8Array(src)); + } else { + let match; + if ((match = /^data:(.*);base64,(.*)$/.exec(src))) { + if (match[1]) { + refBody.Subtype = match[1].replace('/', '#2F'); + } + data = Buffer.from(match[2], 'base64'); + } else { + data = fs.readFileSync(src); + if (!data) { + throw new Error(`Could not read contents of file at filepath ${src}`); + } + + // update CreationDate and ModDate + const { birthtime, ctime } = fs.statSync(src); + refBody.Params.CreationDate = birthtime; + refBody.Params.ModDate = ctime; + } + } + + // override creation date and modified date + if (options.creationDate instanceof Date) { + refBody.Params.CreationDate = options.creationDate; + } + if (options.modifiedDate instanceof Date) { + refBody.Params.ModDate = options.modifiedDate; + } + // add optional subtype + if (options.type) { + refBody.Subtype = options.type.replace('/', '#2F'); + } + + // add checksum and size information + const checksum = createHash('md5') + .update(data) + .digest('hex'); + refBody.Params.CheckSum = new String(checksum); + refBody.Params.Size = data.byteLength; + + // save some space when embedding the same file again + // if a file with the same name and metadata exists, reuse its reference + let ref; + if (!this._fileRegistry) this._fileRegistry = {}; + let file = this._fileRegistry[options.name]; + if (file && isEqual(refBody, file)) { + ref = file.ref; + } else { + ref = this.ref(refBody); + ref.end(data); + + this._fileRegistry[options.name] = { ...refBody, ref }; + } + // add filespec for embedded file + const fileSpecBody = { + Type: 'Filespec', + F: new String(options.name), + EF: { F: ref }, + UF: new String(options.name) + }; + if (options.description) { + fileSpecBody.Desc = new String(options.description); + } + const filespec = this.ref(fileSpecBody); + filespec.end(); + + if (!options.hidden) { + this.addNamedEmbeddedFile(options.name, filespec); + } + + return filespec; + } +}; + +/** check two embedded file metadata objects for equality */ +function isEqual(a, b) { + if ( + a.Subtype !== b.Subtype || + a.Params.CheckSum.toString() !== b.Params.CheckSum.toString() || + a.Params.Size !== b.Params.Size || + a.Params.CreationDate !== b.Params.CreationDate || + a.Params.ModDate !== b.Params.ModDate + ) { + return false; + } + return true; +} diff --git a/lib/tree.js b/lib/tree.js index 59261eea8..836b7df99 100644 --- a/lib/tree.js +++ b/lib/tree.js @@ -5,8 +5,11 @@ PDFTree - abstract base class for name and number tree objects import PDFObject from './object'; class PDFTree { - constructor() { + constructor(options = {}) { this._items = {}; + // disable /Limits output for this tree + this.limits = + typeof options.limits === 'boolean' ? options.limits : true; } add(key, val) { @@ -24,7 +27,7 @@ class PDFTree { ); const out = ['<<']; - if (sortedKeys.length > 1) { + if (this.limits && sortedKeys.length > 1) { const first = sortedKeys[0], last = sortedKeys[sortedKeys.length - 1]; out.push( diff --git a/tests/unit/annotations.spec.js b/tests/unit/annotations.spec.js index eb93c9d9e..9e1bda258 100644 --- a/tests/unit/annotations.spec.js +++ b/tests/unit/annotations.spec.js @@ -100,4 +100,82 @@ describe('Annotations', () => { expect(docData).not.toContainChunk([`14 0 obj`]); }); }); + + describe('fileAnnotation', () => { + test('creating a fileAnnotation', () => { + const docData = logData(document); + + document.fileAnnotation(100, 100, 20, 20, { + src: Buffer.from('example text'), + name: 'file.txt' + }); + + expect(docData).toContainChunk([ + `10 0 obj`, + `<< +/Subtype /FileAttachment +/FS 9 0 R +/Type /Annot +/Rect [100 672 120 692] +/Border [0 0 0] +/C [0 0 0] +>>` + ]); + }); + + test("using the file's description", () => { + const docData = logData(document); + + document.fileAnnotation(100, 100, 20, 20, { + src: Buffer.from('example text'), + name: 'file.txt', + description: 'file description' + }); + + expect(docData).toContainChunk([ + `10 0 obj`, + `<< +/Subtype /FileAttachment +/FS 9 0 R +/Contents (file description) +/Type /Annot +/Rect [100 672 120 692] +/Border [0 0 0] +/C [0 0 0] +>>` + ]); + }); + + test("overriding the file's description", () => { + const docData = logData(document); + + document.fileAnnotation( + 100, + 100, + 20, + 20, + { + src: Buffer.from('example text'), + name: 'file.txt', + description: 'file description' + }, + { + Contents: 'other description' + } + ); + + expect(docData).toContainChunk([ + `10 0 obj`, + `<< +/Contents (other description) +/Subtype /FileAttachment +/FS 9 0 R +/Type /Annot +/Rect [100 672 120 692] +/Border [0 0 0] +/C [0 0 0] +>>` + ]); + }); + }); }); diff --git a/tests/unit/attachments.spec.js b/tests/unit/attachments.spec.js new file mode 100644 index 000000000..a23865837 --- /dev/null +++ b/tests/unit/attachments.spec.js @@ -0,0 +1,198 @@ +import PDFDocument from '../../lib/document'; +import PDFSecurity from '../../lib/security'; +import { logData } from './helpers'; +import { createHash } from 'crypto'; + +// manual mock for PDFSecurity to ensure stored id will be the same across different systems +PDFSecurity.generateFileID = () => { + return Buffer.from('mocked-pdf-id'); +}; + +const date = new Date(Date.UTC(2018, 1, 1)); + +describe('file', () => { + let document; + + beforeEach(() => { + document = new PDFDocument({ + info: { CreationDate: date } + }); + }); + + test('with name and type', () => { + const docData = logData(document); + + document.file(Buffer.from('example text'), { + name: 'file.txt', + type: 'text/plain', + creationDate: date, + modifiedDate: date + }); + document.end(); + + const md5 = createHash('md5') + .update('example text') + .digest('hex'); + + expect(docData).toContainChunk([ + `8 0 obj`, + `<< +/Type /EmbeddedFile +/Params << +/CreationDate (D:20180201000000Z) +/ModDate (D:20180201000000Z) +/CheckSum (${md5}) +/Size 12 +>> +/Subtype /text#2Fplain +/Length 20 +/Filter /FlateDecode +>>` + ]); + + expect(docData).toContainChunk([ + `9 0 obj`, + `<< +/Type /Filespec +/F (file.txt) +/EF << +/F 8 0 R +>> +/UF (file.txt) +>>` + ]); + + expect(docData).toContainChunk([ + `2 0 obj`, + `<< +/Dests << + /Names [ +] +>> +/EmbeddedFiles << + /Names [ + (file.txt) 9 0 R +] +>> +>>` + ]); + }); + + test('with description', () => { + const docData = logData(document); + + document.file(Buffer.from('example text'), { + name: 'file.txt', + creationDate: date, + modifiedDate: date, + description: 'file description' + }); + document.end(); + + const md5 = createHash('md5') + .update('example text') + .digest('hex'); + + expect(docData).toContainChunk([ + `8 0 obj`, + `<< +/Type /EmbeddedFile +/Params << +/CreationDate (D:20180201000000Z) +/ModDate (D:20180201000000Z) +/CheckSum (${md5}) +/Size 12 +>> +/Length 20 +/Filter /FlateDecode +>>` + ]); + + expect(docData).toContainChunk([ + `9 0 obj`, + `<< +/Type /Filespec +/F (file.txt) +/EF << +/F 8 0 R +>> +/UF (file.txt) +/Desc (file description) +>>` + ]); + }); + + test('with hidden option', () => { + const docData = logData(document); + + document.file(Buffer.from('example text'), { + name: 'file.txt', + creationDate: date, + modifiedDate: date, + hidden: true + }); + document.end(); + + const md5 = createHash('md5') + .update('example text') + .digest('hex'); + + expect(docData).toContainChunk([ + `8 0 obj`, + `<< +/Type /EmbeddedFile +/Params << +/CreationDate (D:20180201000000Z) +/ModDate (D:20180201000000Z) +/CheckSum (${md5}) +/Size 12 +>> +/Length 20 +/Filter /FlateDecode +>>` + ]); + + // hidden: do not add to /EmbeddedFiles + expect(docData).toContainChunk([ + `2 0 obj`, + `<< +/Dests << + /Names [ +] +>> +>>` + ]); + }); + + test('attach multiple files', () => { + const docData = logData(document); + + document.file(Buffer.from('example text'), { + name: 'file1.txt', + creationDate: date, + modifiedDate: date + }); + document.file(Buffer.from('example text'), { + name: 'file2.txt', + creationDate: date, + modifiedDate: date + }); + document.end(); + + expect(docData).toContainChunk([ + `2 0 obj`, + `<< +/Dests << + /Names [ +] +>> +/EmbeddedFiles << + /Names [ + (file1.txt) 9 0 R + (file2.txt) 11 0 R +] +>> +>>` + ]); + }); +});