From 9e5b3b4b92dfa20b625a4fb3fedf16242b252536 Mon Sep 17 00:00:00 2001 From: Ziggy Jonsson Date: Sat, 11 May 2024 15:48:39 -0400 Subject: [PATCH 1/5] eslint --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index eefdaf4..3f3f108 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,4 +35,5 @@ jobs: node-version: ${{ matrix.node-version }} - run: npm install - run: npm run build --if-present + - run: npx eslint . - run: npm test From ff7f8a0a9cc664483ca6f00c2aa210b708cac398 Mon Sep 17 00:00:00 2001 From: Ziggy Jonsson Date: Sat, 11 May 2024 15:54:52 -0400 Subject: [PATCH 2/5] only run on 18 --- .github/workflows/test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3f3f108..eefdaf4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,5 +35,4 @@ jobs: node-version: ${{ matrix.node-version }} - run: npm install - run: npm run build --if-present - - run: npx eslint . - run: npm test From 2be696cbcdaec7e458022fb1000874ce3dd417b6 Mon Sep 17 00:00:00 2001 From: Ziggy Jonsson Date: Sat, 11 May 2024 13:05:07 -0400 Subject: [PATCH 3/5] Move open to async/await --- lib/Open/directory.js | 337 ++++++++++++++++++++---------------------- lib/Open/unzip.js | 87 +++++------ 2 files changed, 202 insertions(+), 222 deletions(-) diff --git a/lib/Open/directory.js b/lib/Open/directory.js index 88ea27d..98616b8 100644 --- a/lib/Open/directory.js +++ b/lib/Open/directory.js @@ -11,29 +11,27 @@ const Bluebird = require('bluebird'); const signature = Buffer.alloc(4); signature.writeUInt32LE(0x06054b50, 0); -function getCrxHeader(source) { - const sourceStream = source.stream(0).pipe(PullStream()); - - return sourceStream.pull(4).then(function(data) { - const signature = data.readUInt32LE(0); - if (signature === 0x34327243) { - let crxHeader; - return sourceStream.pull(12).then(function(data) { - crxHeader = parseBuffer.parse(data, [ - ['version', 4], - ['pubKeyLength', 4], - ['signatureLength', 4], - ]); - }).then(function() { - return sourceStream.pull(crxHeader.pubKeyLength +crxHeader.signatureLength); - }).then(function(data) { - crxHeader.publicKey = data.slice(0, crxHeader.pubKeyLength); - crxHeader.signature = data.slice(crxHeader.pubKeyLength); - crxHeader.size = 16 + crxHeader.pubKeyLength +crxHeader.signatureLength; - return crxHeader; - }); - } - }); +async function getCrxHeader(source) { + var sourceStream = source.stream(0).pipe(PullStream()); + + let data = await sourceStream.pull(4); + var signature = data.readUInt32LE(0); + if (signature === 0x34327243) { + var crxHeader; + data = await sourceStream.pull(12); + crxHeader = parseBuffer.parse(data, [ + ['version', 4], + ['pubKeyLength', 4], + ['signatureLength', 4], + ]); + + data = await sourceStream.pull(crxHeader.pubKeyLength +crxHeader.signatureLength); + + crxHeader.publicKey = data.slice(0,crxHeader.pubKeyLength); + crxHeader.signature = data.slice(crxHeader.pubKeyLength); + crxHeader.size = 16 + crxHeader.pubKeyLength +crxHeader.signatureLength; + return crxHeader; + } } // Zip64 File Format Notes: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT @@ -77,161 +75,148 @@ function parseZip64DirRecord (dir64record) { return vars; } -module.exports = function centralDirectory(source, options) { - const endDir = PullStream(); - const records = PullStream(); - const tailSize = (options && options.tailSize) || 80; - let sourceSize, - crxHeader, - startOffset, - vars; +module.exports = async function centralDirectory(source, options) { + var endDir = PullStream(), + records = PullStream(), + tailSize = (options && options.tailSize) || 80, + crxHeader, + startOffset, + vars; if (options && options.crx) - crxHeader = getCrxHeader(source); - - return source.size() - .then(function(size) { - sourceSize = size; - - source.stream(Math.max(0, size-tailSize)) - .on('error', function (error) { endDir.emit('error', error); }) - .pipe(endDir); - - return endDir.pull(signature); - }) - .then(function() { - return Bluebird.props({directory: endDir.pull(22), crxHeader: crxHeader}); - }) - .then(function(d) { - const data = d.directory; - startOffset = d.crxHeader && d.crxHeader.size || 0; - - vars = parseBuffer.parse(data, [ - ['signature', 4], - ['diskNumber', 2], - ['diskStart', 2], - ['numberOfRecordsOnDisk', 2], - ['numberOfRecords', 2], - ['sizeOfCentralDirectory', 4], - ['offsetToStartOfCentralDirectory', 4], - ['commentLength', 2], - ]); - - // Is this zip file using zip64 format? Use same check as Go: - // https://github.com/golang/go/blob/master/src/archive/zip/reader.go#L503 - // For zip64 files, need to find zip64 central directory locator header to extract - // relative offset for zip64 central directory record. - if (vars.numberOfRecords == 0xffff|| vars.numberOfRecords == 0xffff || - vars.offsetToStartOfCentralDirectory == 0xffffffff) { - - // Offset to zip64 CDL is 20 bytes before normal CDR - const zip64CDLSize = 20; - const zip64CDLOffset = sourceSize - (tailSize - endDir.match + zip64CDLSize); - const zip64CDLStream = PullStream(); - - source.stream(zip64CDLOffset).pipe(zip64CDLStream); - - return zip64CDLStream.pull(zip64CDLSize) - .then(function (d) { return getZip64CentralDirectory(source, d); }) - .then(function (dir64record) { - vars = parseZip64DirRecord(dir64record); - }); - } else { - vars.offsetToStartOfCentralDirectory += startOffset; + crxHeader = await getCrxHeader(source); + + const sourceSize = await source.size(); + + source.stream(Math.max(0,sourceSize-tailSize)) + .on('error', function (error) { endDir.emit('error', error) }) + .pipe(endDir); + + await endDir.pull(signature); + + var data = await endDir.pull(22); + startOffset = crxHeader && crxHeader.size || 0; + + vars = parseBuffer.parse(data, [ + ['signature', 4], + ['diskNumber', 2], + ['diskStart', 2], + ['numberOfRecordsOnDisk', 2], + ['numberOfRecords', 2], + ['sizeOfCentralDirectory', 4], + ['offsetToStartOfCentralDirectory', 4], + ['commentLength', 2], + ]); + + // Is this zip file using zip64 format? Use same check as Go: + // https://github.com/golang/go/blob/master/src/archive/zip/reader.go#L503 + // For zip64 files, need to find zip64 central directory locator header to extract + // relative offset for zip64 central directory record. + if (vars.numberOfRecords == 0xffff|| vars.numberOfRecords == 0xffff || + vars.offsetToStartOfCentralDirectory == 0xffffffff) { + + // Offset to zip64 CDL is 20 bytes before normal CDR + const zip64CDLSize = 20 + const zip64CDLOffset = sourceSize - (tailSize - endDir.match + zip64CDLSize) + const zip64CDLStream = PullStream(); + + source.stream(zip64CDLOffset).pipe(zip64CDLStream); + + const d = await zip64CDLStream.pull(zip64CDLSize) + const dir64record = await getZip64CentralDirectory(source, d);; + + vars = parseZip64DirRecord(dir64record) + + } else { + vars.offsetToStartOfCentralDirectory += startOffset; + } + + if (vars.commentLength) { + const comment = await endDir.pull(vars.commentLength); + vars.comment = comment.toString('utf8'); + }; + + source.stream(vars.offsetToStartOfCentralDirectory).pipe(records); + + vars.extract = async function(opts) { + if (!opts || !opts.path) throw new Error('PATH_MISSING'); + // make sure path is normalized before using it + opts.path = path.resolve(path.normalize(opts.path)); + const files = await vars.files; + + return Promise.map(files, function(entry) { + if (entry.type == 'Directory') return; + + // to avoid zip slip (writing outside of the destination), we resolve + // the target path, and make sure it's nested in the intended + // destination, or not extract it otherwise. + var extractPath = path.join(opts.path, entry.path); + if (extractPath.indexOf(opts.path) != 0) { + return; } - }) - .then(function() { - if (vars.commentLength) return endDir.pull(vars.commentLength).then(function(comment) { - vars.comment = comment.toString('utf8'); - }); - }) - .then(function() { - source.stream(vars.offsetToStartOfCentralDirectory).pipe(records); - - vars.extract = function(opts) { - if (!opts || !opts.path) throw new Error('PATH_MISSING'); - // make sure path is normalized before using it - opts.path = path.resolve(path.normalize(opts.path)); - return vars.files.then(function(files) { - return Bluebird.map(files, function(entry) { - if (entry.type == 'Directory') return; - - // to avoid zip slip (writing outside of the destination), we resolve - // the target path, and make sure it's nested in the intended - // destination, or not extract it otherwise. - const extractPath = path.join(opts.path, entry.path); - if (extractPath.indexOf(opts.path) != 0) { - return; - } - const writer = opts.getWriter ? opts.getWriter({path: extractPath}) : Writer({ path: extractPath }); - - return new Promise(function(resolve, reject) { - entry.stream(opts.password) - .on('error', reject) - .pipe(writer) - .on('close', resolve) - .on('error', reject); - }); - }, { concurrency: opts.concurrency > 1 ? opts.concurrency : 1 }); - }); - }; - - vars.files = Bluebird.mapSeries(Array(vars.numberOfRecords), function() { - return records.pull(46).then(function(data) { - const vars = parseBuffer.parse(data, [ - ['signature', 4], - ['versionMadeBy', 2], - ['versionsNeededToExtract', 2], - ['flags', 2], - ['compressionMethod', 2], - ['lastModifiedTime', 2], - ['lastModifiedDate', 2], - ['crc32', 4], - ['compressedSize', 4], - ['uncompressedSize', 4], - ['fileNameLength', 2], - ['extraFieldLength', 2], - ['fileCommentLength', 2], - ['diskNumber', 2], - ['internalFileAttributes', 2], - ['externalFileAttributes', 4], - ['offsetToLocalFileHeader', 4], - ]); - - vars.offsetToLocalFileHeader += startOffset; - vars.lastModifiedDateTime = parseDateTime(vars.lastModifiedDate, vars.lastModifiedTime); - - return records.pull(vars.fileNameLength).then(function(fileNameBuffer) { - vars.pathBuffer = fileNameBuffer; - vars.path = fileNameBuffer.toString('utf8'); - vars.isUnicode = (vars.flags & 0x800) != 0; - return records.pull(vars.extraFieldLength); - }) - .then(function(extraField) { - vars.extra = parseExtraField(extraField, vars); - return records.pull(vars.fileCommentLength); - }) - .then(function(comment) { - vars.comment = comment; - vars.type = (vars.uncompressedSize === 0 && /[/\\]$/.test(vars.path)) ? 'Directory' : 'File'; - const padding = options && options.padding || 1000; - vars.stream = function(_password) { - const totalSize = 30 - + padding // add an extra buffer - + (vars.extraFieldLength || 0) - + (vars.fileNameLength || 0) - + vars.compressedSize; - - return unzip(source, vars.offsetToLocalFileHeader, _password, vars, totalSize); - }; - vars.buffer = function(_password) { - return BufferStream(vars.stream(_password)); - }; - return vars; - }); - }); + var writer = opts.getWriter ? opts.getWriter({path: extractPath}) : Writer({ path: extractPath }); + + return new Promise(function(resolve, reject) { + entry.stream(opts.password) + .on('error',reject) + .pipe(writer) + .on('close',resolve) + .on('error',reject); }); - - return Bluebird.props(vars); - }); + }, { concurrency: opts.concurrency > 1 ? opts.concurrency : 1 }); + }; + + vars.files = Promise.mapSeries(Array(vars.numberOfRecords),async function() { + const data = await records.pull(46) + var vars = vars = parseBuffer.parse(data, [ + ['signature', 4], + ['versionMadeBy', 2], + ['versionsNeededToExtract', 2], + ['flags', 2], + ['compressionMethod', 2], + ['lastModifiedTime', 2], + ['lastModifiedDate', 2], + ['crc32', 4], + ['compressedSize', 4], + ['uncompressedSize', 4], + ['fileNameLength', 2], + ['extraFieldLength', 2], + ['fileCommentLength', 2], + ['diskNumber', 2], + ['internalFileAttributes', 2], + ['externalFileAttributes', 4], + ['offsetToLocalFileHeader', 4], + ]); + + vars.offsetToLocalFileHeader += startOffset; + vars.lastModifiedDateTime = parseDateTime(vars.lastModifiedDate, vars.lastModifiedTime); + + const fileNameBuffer = await records.pull(vars.fileNameLength); + vars.pathBuffer = fileNameBuffer; + vars.path = fileNameBuffer.toString('utf8'); + vars.isUnicode = (vars.flags & 0x800) != 0; + const extraField = await records.pull(vars.extraFieldLength); + + vars.extra = parseExtraField(extraField, vars); + const comment = await records.pull(vars.fileCommentLength); + + vars.comment = comment; + vars.type = (vars.uncompressedSize === 0 && /[\/\\]$/.test(vars.path)) ? 'Directory' : 'File'; + var padding = options && options.padding || 1000; + vars.stream = function(_password) { + var totalSize = 30 + + padding // add an extra buffer + + (vars.extraFieldLength || 0) + + (vars.fileNameLength || 0) + + vars.compressedSize; + + return unzip(source, vars.offsetToLocalFileHeader,_password, vars, totalSize); + }; + vars.buffer = function(_password) { + return BufferStream(vars.stream(_password)); + }; + return vars; + }); + + return Promise.props(vars); }; diff --git a/lib/Open/unzip.js b/lib/Open/unzip.js index b8d55bf..5cb0ef1 100644 --- a/lib/Open/unzip.js +++ b/lib/Open/unzip.js @@ -15,8 +15,11 @@ module.exports = function unzip(source, offset, _password, directoryVars, length entry.emit('error', e); }); - entry.vars = file.pull(30) - .then(function(data) { + // Create a separate promise chain to pipe into entry + // This allows us to return entry synchronously + Promise.resolve() + .then(async function () { + const data = await file.pull(30); let vars = parseBuffer.parse(data, [ ['signature', 4], ['versionsNeededToExtract', 2], @@ -33,52 +36,44 @@ module.exports = function unzip(source, offset, _password, directoryVars, length vars.lastModifiedDateTime = parseDateTime(vars.lastModifiedDate, vars.lastModifiedTime); - return file.pull(vars.fileNameLength) - .then(function(fileName) { - vars.fileName = fileName.toString('utf8'); - return file.pull(vars.extraFieldLength); - }) - .then(function(extraField) { - let checkEncryption; - vars.extra = parseExtraField(extraField, vars); - // Ignore logal file header vars if the directory vars are available - if (directoryVars && directoryVars.compressedSize) vars = directoryVars; - - if (vars.flags & 0x01) checkEncryption = file.pull(12) - .then(function(header) { - if (!_password) - throw new Error('MISSING_PASSWORD'); - - const decrypt = Decrypt(); - - String(_password).split('').forEach(function(d) { - decrypt.update(d); - }); - - for (let i=0; i < header.length; i++) - header[i] = decrypt.decryptByte(header[i]); - - vars.decrypt = decrypt; - vars.compressedSize -= 12; - - const check = (vars.flags & 0x8) ? (vars.lastModifiedTime >> 8) & 0xff : (vars.crc32 >> 24) & 0xff; - if (header[11] !== check) - throw new Error('BAD_PASSWORD'); - - return vars; - }); - - return Promise.resolve(checkEncryption) - .then(function() { - entry.emit('vars', vars); - return vars; - }); + const fileName = await file.pull(vars.fileNameLength); + + vars.fileName = fileName.toString('utf8'); + const extraField = await file.pull(vars.extraFieldLength); + + var checkEncryption; + vars.extra = parseExtraField(extraField, vars); + // Ignore logal file header vars if the directory vars are available + if (directoryVars && directoryVars.compressedSize) vars = directoryVars; + + if (vars.flags & 0x01) { + const header = await file.pull(12) + + if (!_password) + throw new Error('MISSING_PASSWORD'); + + var decrypt = Decrypt(); + + String(_password).split('').forEach(function(d) { + decrypt.update(d); }); - }); - entry.vars.then(function(vars) { - const fileSizeKnown = !(vars.flags & 0x08) || vars.compressedSize > 0; - let eof; + for (var i=0; i < header.length; i++) + header[i] = decrypt.decryptByte(header[i]); + + vars.decrypt = decrypt; + vars.compressedSize -= 12; + + var check = (vars.flags & 0x8) ? (vars.lastModifiedTime >> 8) & 0xff : (vars.crc32 >> 24) & 0xff; + if (header[11] !== check) + throw new Error('BAD_PASSWORD'); + }; + + + entry.emit('vars',vars); + + var fileSizeKnown = !(vars.flags & 0x08) || vars.compressedSize > 0, + eof; const inflater = vars.compressionMethod ? zlib.createInflateRaw() : Stream.PassThrough(); From c80fd62cd8d387da247ea6b1802c34ae14ee838e Mon Sep 17 00:00:00 2001 From: Ziggy Jonsson Date: Sat, 11 May 2024 14:38:51 -0400 Subject: [PATCH 4/5] Retry stream if not big enough --- lib/Open/unzip.js | 17 ++++++++++++++--- test/office-files.js | 26 ++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/lib/Open/unzip.js b/lib/Open/unzip.js index 5cb0ef1..079ca7d 100644 --- a/lib/Open/unzip.js +++ b/lib/Open/unzip.js @@ -6,9 +6,9 @@ const parseExtraField = require('../parseExtraField'); const parseDateTime = require('../parseDateTime'); const parseBuffer = require('../parseBuffer'); -module.exports = function unzip(source, offset, _password, directoryVars, length) { - const file = PullStream(), - entry = Stream.PassThrough(); +module.exports = function unzip(source, offset, _password, directoryVars, length, _entry) { + var file = PullStream(), + entry = _entry || Stream.PassThrough(); const req = source.stream(offset, length); req.pipe(file).on('error', function(e) { @@ -34,6 +34,17 @@ module.exports = function unzip(source, offset, _password, directoryVars, length ['extraFieldLength', 2], ]); + var localSize = 30 + + 100 // add extra padding + + (vars.extraFieldLength || 0) + + (vars.fileNameLength || 0) + + vars.compressedSize; + + if (localSize > length) { + entry.emit('streamRetry', localSize); + return unzip(source, offset, _password, directoryVars, localSize, entry); + } + vars.lastModifiedDateTime = parseDateTime(vars.lastModifiedDate, vars.lastModifiedTime); const fileName = await file.pull(vars.fileNameLength); diff --git a/test/office-files.js b/test/office-files.js index b7baabe..00068e2 100644 --- a/test/office-files.js +++ b/test/office-files.js @@ -2,8 +2,16 @@ const test = require('tap').test; const path = require('path'); const unzip = require('../'); -test("get content a docx file without errors", async function () { - const archive = path.join(__dirname, '../testData/office/testfile.docx'); +var test = require('tap').test; +var fs = require('fs'); +var path = require('path'); +var unzip = require('../'); +var il = require('iconv-lite'); +var Promise = require('bluebird'); +var NoopStream = require('../lib/NoopStream'); + +test("get content a docx file without errors", async function (t) { + var archive = path.join(__dirname, '../testData/office/testfile.docx'); const directory = await unzip.Open.file(archive); await Promise.all(directory.files.map(file => file.buffer())); @@ -14,4 +22,18 @@ test("get content a xlsx file without errors", async function () { const directory = await unzip.Open.file(archive); await Promise.all(directory.files.map(file => file.buffer())); +}); + +test("stream retries when the local file header indicates bigger size than central directory", async function (t) { + var archive = path.join(__dirname, '../testData/office/testfile.xlsx'); + let retries = 0, size; + const directory = await unzip.Open.file(archive, {padding: 10}); + const stream = directory.files[0].stream(); + stream.on('streamRetry', _size => { + retries += 1; + size = _size; + }); + await new Promise(resolve => stream.pipe(NoopStream()).on('finish', resolve)); + t.ok(retries === 1, 'retries once'); + t.ok(size > 0, 'size is set'); }); \ No newline at end of file From ca37f3b3c9a6ac4b5e33a4d70e4d9e341851a470 Mon Sep 17 00:00:00 2001 From: Ziggy Jonsson Date: Sat, 11 May 2024 16:08:34 -0400 Subject: [PATCH 5/5] fix eslint --- lib/Open/directory.js | 77 ++++++++++++++++----------------- lib/Open/unzip.js | 99 +++++++++++++++++++++---------------------- test/office-files.js | 15 ++----- 3 files changed, 90 insertions(+), 101 deletions(-) diff --git a/lib/Open/directory.js b/lib/Open/directory.js index 98616b8..97f785c 100644 --- a/lib/Open/directory.js +++ b/lib/Open/directory.js @@ -12,22 +12,21 @@ const signature = Buffer.alloc(4); signature.writeUInt32LE(0x06054b50, 0); async function getCrxHeader(source) { - var sourceStream = source.stream(0).pipe(PullStream()); + const sourceStream = source.stream(0).pipe(PullStream()); let data = await sourceStream.pull(4); - var signature = data.readUInt32LE(0); + const signature = data.readUInt32LE(0); if (signature === 0x34327243) { - var crxHeader; data = await sourceStream.pull(12); - crxHeader = parseBuffer.parse(data, [ + const crxHeader = parseBuffer.parse(data, [ ['version', 4], ['pubKeyLength', 4], ['signatureLength', 4], ]); - + data = await sourceStream.pull(crxHeader.pubKeyLength +crxHeader.signatureLength); - crxHeader.publicKey = data.slice(0,crxHeader.pubKeyLength); + crxHeader.publicKey = data.slice(0, crxHeader.pubKeyLength); crxHeader.signature = data.slice(crxHeader.pubKeyLength); crxHeader.size = 16 + crxHeader.pubKeyLength +crxHeader.signatureLength; return crxHeader; @@ -76,26 +75,24 @@ function parseZip64DirRecord (dir64record) { } module.exports = async function centralDirectory(source, options) { - var endDir = PullStream(), - records = PullStream(), - tailSize = (options && options.tailSize) || 80, - crxHeader, - startOffset, - vars; + const endDir = PullStream(); + const records = PullStream(); + const tailSize = (options && options.tailSize) || 80; + let crxHeader, vars; if (options && options.crx) crxHeader = await getCrxHeader(source); const sourceSize = await source.size(); - source.stream(Math.max(0,sourceSize-tailSize)) - .on('error', function (error) { endDir.emit('error', error) }) + source.stream(Math.max(0, sourceSize-tailSize)) + .on('error', function (error) { endDir.emit('error', error); }) .pipe(endDir); await endDir.pull(signature); - var data = await endDir.pull(22); - startOffset = crxHeader && crxHeader.size || 0; + const data = await endDir.pull(22); + const startOffset = crxHeader && crxHeader.size || 0; vars = parseBuffer.parse(data, [ ['signature', 4], @@ -116,16 +113,16 @@ module.exports = async function centralDirectory(source, options) { vars.offsetToStartOfCentralDirectory == 0xffffffff) { // Offset to zip64 CDL is 20 bytes before normal CDR - const zip64CDLSize = 20 - const zip64CDLOffset = sourceSize - (tailSize - endDir.match + zip64CDLSize) + const zip64CDLSize = 20; + const zip64CDLOffset = sourceSize - (tailSize - endDir.match + zip64CDLSize); const zip64CDLStream = PullStream(); source.stream(zip64CDLOffset).pipe(zip64CDLStream); - const d = await zip64CDLStream.pull(zip64CDLSize) + const d = await zip64CDLStream.pull(zip64CDLSize); const dir64record = await getZip64CentralDirectory(source, d);; - - vars = parseZip64DirRecord(dir64record) + + vars = parseZip64DirRecord(dir64record); } else { vars.offsetToStartOfCentralDirectory += startOffset; @@ -143,32 +140,32 @@ module.exports = async function centralDirectory(source, options) { // make sure path is normalized before using it opts.path = path.resolve(path.normalize(opts.path)); const files = await vars.files; - - return Promise.map(files, function(entry) { + + return Bluebird.map(files, function(entry) { if (entry.type == 'Directory') return; // to avoid zip slip (writing outside of the destination), we resolve // the target path, and make sure it's nested in the intended // destination, or not extract it otherwise. - var extractPath = path.join(opts.path, entry.path); + const extractPath = path.join(opts.path, entry.path); if (extractPath.indexOf(opts.path) != 0) { return; } - var writer = opts.getWriter ? opts.getWriter({path: extractPath}) : Writer({ path: extractPath }); + const writer = opts.getWriter ? opts.getWriter({path: extractPath}) : Writer({ path: extractPath }); return new Promise(function(resolve, reject) { entry.stream(opts.password) - .on('error',reject) + .on('error', reject) .pipe(writer) - .on('close',resolve) - .on('error',reject); + .on('close', resolve) + .on('error', reject); }); }, { concurrency: opts.concurrency > 1 ? opts.concurrency : 1 }); }; - vars.files = Promise.mapSeries(Array(vars.numberOfRecords),async function() { - const data = await records.pull(46) - var vars = vars = parseBuffer.parse(data, [ + vars.files = Bluebird.mapSeries(Array(vars.numberOfRecords), async function() { + const data = await records.pull(46); + const vars = parseBuffer.parse(data, [ ['signature', 4], ['versionMadeBy', 2], ['versionsNeededToExtract', 2], @@ -198,25 +195,25 @@ module.exports = async function centralDirectory(source, options) { const extraField = await records.pull(vars.extraFieldLength); vars.extra = parseExtraField(extraField, vars); - const comment = await records.pull(vars.fileCommentLength); - + const comment = await records.pull(vars.fileCommentLength); + vars.comment = comment; - vars.type = (vars.uncompressedSize === 0 && /[\/\\]$/.test(vars.path)) ? 'Directory' : 'File'; - var padding = options && options.padding || 1000; + vars.type = (vars.uncompressedSize === 0 && /[/\\]$/.test(vars.path)) ? 'Directory' : 'File'; + const padding = options && options.padding || 1000; vars.stream = function(_password) { - var totalSize = 30 + const totalSize = 30 + padding // add an extra buffer - + (vars.extraFieldLength || 0) + + (vars.extraFieldLength || 0) + (vars.fileNameLength || 0) + vars.compressedSize; - return unzip(source, vars.offsetToLocalFileHeader,_password, vars, totalSize); + return unzip(source, vars.offsetToLocalFileHeader, _password, vars, totalSize); }; vars.buffer = function(_password) { return BufferStream(vars.stream(_password)); }; return vars; }); - - return Promise.props(vars); + + return Bluebird.props(vars); }; diff --git a/lib/Open/unzip.js b/lib/Open/unzip.js index 079ca7d..9643c1b 100644 --- a/lib/Open/unzip.js +++ b/lib/Open/unzip.js @@ -7,8 +7,8 @@ const parseDateTime = require('../parseDateTime'); const parseBuffer = require('../parseBuffer'); module.exports = function unzip(source, offset, _password, directoryVars, length, _entry) { - var file = PullStream(), - entry = _entry || Stream.PassThrough(); + const file = PullStream(); + const entry = _entry || Stream.PassThrough(); const req = source.stream(offset, length); req.pipe(file).on('error', function(e) { @@ -34,12 +34,12 @@ module.exports = function unzip(source, offset, _password, directoryVars, length ['extraFieldLength', 2], ]); - var localSize = 30 + const localSize = 30 + 100 // add extra padding - + (vars.extraFieldLength || 0) + + (vars.extraFieldLength || 0) + (vars.fileNameLength || 0) + vars.compressedSize; - + if (localSize > length) { entry.emit('streamRetry', localSize); return unzip(source, offset, _password, directoryVars, localSize, entry); @@ -48,76 +48,75 @@ module.exports = function unzip(source, offset, _password, directoryVars, length vars.lastModifiedDateTime = parseDateTime(vars.lastModifiedDate, vars.lastModifiedTime); const fileName = await file.pull(vars.fileNameLength); - + vars.fileName = fileName.toString('utf8'); const extraField = await file.pull(vars.extraFieldLength); - - var checkEncryption; + vars.extra = parseExtraField(extraField, vars); // Ignore logal file header vars if the directory vars are available if (directoryVars && directoryVars.compressedSize) vars = directoryVars; if (vars.flags & 0x01) { - const header = await file.pull(12) - + const header = await file.pull(12); + if (!_password) throw new Error('MISSING_PASSWORD'); - var decrypt = Decrypt(); + const decrypt = Decrypt(); String(_password).split('').forEach(function(d) { decrypt.update(d); }); - for (var i=0; i < header.length; i++) + for (let i=0; i < header.length; i++) header[i] = decrypt.decryptByte(header[i]); vars.decrypt = decrypt; vars.compressedSize -= 12; - var check = (vars.flags & 0x8) ? (vars.lastModifiedTime >> 8) & 0xff : (vars.crc32 >> 24) & 0xff; + const check = (vars.flags & 0x8) ? (vars.lastModifiedTime >> 8) & 0xff : (vars.crc32 >> 24) & 0xff; if (header[11] !== check) throw new Error('BAD_PASSWORD'); }; - entry.emit('vars',vars); - - var fileSizeKnown = !(vars.flags & 0x08) || vars.compressedSize > 0, - eof; - - const inflater = vars.compressionMethod ? zlib.createInflateRaw() : Stream.PassThrough(); - - if (fileSizeKnown) { - entry.size = vars.uncompressedSize; - eof = vars.compressedSize; - } else { - eof = Buffer.alloc(4); - eof.writeUInt32LE(0x08074b50, 0); - } - - let stream = file.stream(eof); - - if (vars.decrypt) - stream = stream.pipe(vars.decrypt.stream()); - - stream - .pipe(inflater) - .on('error', function(err) { entry.emit('error', err);}) - .pipe(entry) - .on('finish', function() { - if(req.destroy) - req.destroy(); - else if (req.abort) - req.abort(); - else if (req.close) - req.close(); - else if (req.push) - req.push(); - else - console.log('warning - unable to close stream'); - }); - }) + entry.emit('vars', vars); + + const fileSizeKnown = !(vars.flags & 0x08) || vars.compressedSize > 0; + let eof; + + const inflater = vars.compressionMethod ? zlib.createInflateRaw() : Stream.PassThrough(); + + if (fileSizeKnown) { + entry.size = vars.uncompressedSize; + eof = vars.compressedSize; + } else { + eof = Buffer.alloc(4); + eof.writeUInt32LE(0x08074b50, 0); + } + + let stream = file.stream(eof); + + if (vars.decrypt) + stream = stream.pipe(vars.decrypt.stream()); + + stream + .pipe(inflater) + .on('error', function(err) { entry.emit('error', err);}) + .pipe(entry) + .on('finish', function() { + if(req.destroy) + req.destroy(); + else if (req.abort) + req.abort(); + else if (req.close) + req.close(); + else if (req.push) + req.push(); + else + console.log('warning - unable to close stream'); + }); + }) .catch(function(e) { entry.emit('error', e); }); diff --git a/test/office-files.js b/test/office-files.js index 00068e2..b0a9fe4 100644 --- a/test/office-files.js +++ b/test/office-files.js @@ -1,17 +1,10 @@ const test = require('tap').test; const path = require('path'); const unzip = require('../'); +const NoopStream = require('../lib/NoopStream'); -var test = require('tap').test; -var fs = require('fs'); -var path = require('path'); -var unzip = require('../'); -var il = require('iconv-lite'); -var Promise = require('bluebird'); -var NoopStream = require('../lib/NoopStream'); - -test("get content a docx file without errors", async function (t) { - var archive = path.join(__dirname, '../testData/office/testfile.docx'); +test("get content a docx file without errors", async function () { + const archive = path.join(__dirname, '../testData/office/testfile.docx'); const directory = await unzip.Open.file(archive); await Promise.all(directory.files.map(file => file.buffer())); @@ -25,7 +18,7 @@ test("get content a xlsx file without errors", async function () { }); test("stream retries when the local file header indicates bigger size than central directory", async function (t) { - var archive = path.join(__dirname, '../testData/office/testfile.xlsx'); + const archive = path.join(__dirname, '../testData/office/testfile.xlsx'); let retries = 0, size; const directory = await unzip.Open.file(archive, {padding: 10}); const stream = directory.files[0].stream();